diff --git "a/logs.jsonl" "b/logs.jsonl" new file mode 100644--- /dev/null +++ "b/logs.jsonl" @@ -0,0 +1,14260 @@ +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:34:45.888087', 'step': 0, 'epoch': 0} +{'type': 'pplx', 'content': 54140675.446864516, 'timestamp': '2025-09-10 02:34:45.891761', 'step': 0, 'epoch': 0} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:45.995408', 'step': 0, 'epoch': 1} +{'type': 'loss', 'content': 0.6008338332176208, 'timestamp': '2025-09-10 02:34:45.997239', 'step': 1, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:46.064445', 'step': 1, 'epoch': 1} +{'type': 'loss', 'content': 0.5395371317863464, 'timestamp': '2025-09-10 02:34:46.066601', 'step': 2, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:46.118920', 'step': 2, 'epoch': 1} +{'type': 'loss', 'content': 0.547315239906311, 'timestamp': '2025-09-10 02:34:46.121082', 'step': 3, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:46.186525', 'step': 3, 'epoch': 1} +{'type': 'loss', 'content': 0.6588919758796692, 'timestamp': '2025-09-10 02:34:46.235931', 'step': 4, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:46.288306', 'step': 4, 'epoch': 1} +{'type': 'loss', 'content': 0.490021288394928, 'timestamp': '2025-09-10 02:34:46.290669', 'step': 5, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:46.360797', 'step': 5, 'epoch': 1} +{'type': 'loss', 'content': 0.41430097818374634, 'timestamp': '2025-09-10 02:34:46.362581', 'step': 6, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:46.414975', 'step': 6, 'epoch': 1} +{'type': 'loss', 'content': 0.3882477879524231, 'timestamp': '2025-09-10 02:34:46.417293', 'step': 7, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:46.482150', 'step': 7, 'epoch': 1} +{'type': 'loss', 'content': 0.5351694822311401, 'timestamp': '2025-09-10 02:34:46.488344', 'step': 8, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:46.559251', 'step': 8, 'epoch': 1} +{'type': 'loss', 'content': 0.16230842471122742, 'timestamp': '2025-09-10 02:34:46.561895', 'step': 9, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:46.614581', 'step': 9, 'epoch': 1} +{'type': 'loss', 'content': 0.12545782327651978, 'timestamp': '2025-09-10 02:34:46.617434', 'step': 10, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:46.669848', 'step': 10, 'epoch': 1} +{'type': 'loss', 'content': 0.12372326105833054, 'timestamp': '2025-09-10 02:34:46.672168', 'step': 11, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:46.725226', 'step': 11, 'epoch': 1} +{'type': 'loss', 'content': 0.14071746170520782, 'timestamp': '2025-09-10 02:34:46.730817', 'step': 12, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:46.790259', 'step': 12, 'epoch': 1} +{'type': 'loss', 'content': 0.10930095613002777, 'timestamp': '2025-09-10 02:34:46.792423', 'step': 13, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:46.860349', 'step': 13, 'epoch': 1} +{'type': 'loss', 'content': 0.07601930946111679, 'timestamp': '2025-09-10 02:34:46.866920', 'step': 14, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:46.919533', 'step': 14, 'epoch': 1} +{'type': 'loss', 'content': 0.07199852913618088, 'timestamp': '2025-09-10 02:34:46.921508', 'step': 15, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:46.973747', 'step': 15, 'epoch': 1} +{'type': 'loss', 'content': 0.11638077348470688, 'timestamp': '2025-09-10 02:34:46.979695', 'step': 16, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:47.041847', 'step': 16, 'epoch': 1} +{'type': 'loss', 'content': 0.05031171813607216, 'timestamp': '2025-09-10 02:34:47.046089', 'step': 17, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:47.098775', 'step': 17, 'epoch': 1} +{'type': 'loss', 'content': 0.0463133379817009, 'timestamp': '2025-09-10 02:34:47.100699', 'step': 18, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:47.152851', 'step': 18, 'epoch': 1} +{'type': 'loss', 'content': 0.047386061400175095, 'timestamp': '2025-09-10 02:34:47.159013', 'step': 19, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:47.211476', 'step': 19, 'epoch': 1} +{'type': 'loss', 'content': 0.10854268074035645, 'timestamp': '2025-09-10 02:34:47.217047', 'step': 20, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:47.268622', 'step': 20, 'epoch': 1} +{'type': 'loss', 'content': 0.027190817520022392, 'timestamp': '2025-09-10 02:34:47.274957', 'step': 21, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:47.327445', 'step': 21, 'epoch': 1} +{'type': 'loss', 'content': 0.02770153246819973, 'timestamp': '2025-09-10 02:34:47.329262', 'step': 22, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:47.381654', 'step': 22, 'epoch': 1} +{'type': 'loss', 'content': 0.029606414958834648, 'timestamp': '2025-09-10 02:34:47.383468', 'step': 23, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:47.436729', 'step': 23, 'epoch': 1} +{'type': 'loss', 'content': 0.06987056881189346, 'timestamp': '2025-09-10 02:34:47.442180', 'step': 24, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:47.494008', 'step': 24, 'epoch': 1} +{'type': 'loss', 'content': 0.023294363170862198, 'timestamp': '2025-09-10 02:34:47.496000', 'step': 25, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:47.548031', 'step': 25, 'epoch': 1} +{'type': 'loss', 'content': 0.09575532376766205, 'timestamp': '2025-09-10 02:34:47.549811', 'step': 26, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:47.602244', 'step': 26, 'epoch': 1} +{'type': 'loss', 'content': 0.026402516290545464, 'timestamp': '2025-09-10 02:34:47.604203', 'step': 27, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:47.674084', 'step': 27, 'epoch': 1} +{'type': 'loss', 'content': 0.025583965703845024, 'timestamp': '2025-09-10 02:34:47.680169', 'step': 28, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:47.735999', 'step': 28, 'epoch': 1} +{'type': 'loss', 'content': 0.05738652125000954, 'timestamp': '2025-09-10 02:34:47.737773', 'step': 29, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:34:47.797646', 'step': 29, 'epoch': 1} +{'type': 'loss', 'content': 0.0032462701201438904, 'timestamp': '2025-09-10 02:34:47.808149', 'step': 30, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:47.860369', 'step': 30, 'epoch': 1} +{'type': 'loss', 'content': 0.12516382336616516, 'timestamp': '2025-09-10 02:34:47.862381', 'step': 31, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:47.914798', 'step': 31, 'epoch': 1} +{'type': 'loss', 'content': 0.06932564079761505, 'timestamp': '2025-09-10 02:34:47.920255', 'step': 32, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:47.971477', 'step': 32, 'epoch': 1} +{'type': 'loss', 'content': 0.01569373533129692, 'timestamp': '2025-09-10 02:34:47.973062', 'step': 33, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:48.024914', 'step': 33, 'epoch': 1} +{'type': 'loss', 'content': 0.02847396209836006, 'timestamp': '2025-09-10 02:34:48.027536', 'step': 34, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:48.079740', 'step': 34, 'epoch': 1} +{'type': 'loss', 'content': 0.042594462633132935, 'timestamp': '2025-09-10 02:34:48.085977', 'step': 35, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:48.138004', 'step': 35, 'epoch': 1} +{'type': 'loss', 'content': 0.04337479919195175, 'timestamp': '2025-09-10 02:34:48.143371', 'step': 36, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:48.194982', 'step': 36, 'epoch': 1} +{'type': 'loss', 'content': 0.010617688298225403, 'timestamp': '2025-09-10 02:34:48.197172', 'step': 37, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:48.262865', 'step': 37, 'epoch': 1} +{'type': 'loss', 'content': 0.014554865658283234, 'timestamp': '2025-09-10 02:34:48.264854', 'step': 38, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:48.317418', 'step': 38, 'epoch': 1} +{'type': 'loss', 'content': 0.029846573248505592, 'timestamp': '2025-09-10 02:34:48.319401', 'step': 39, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:48.373782', 'step': 39, 'epoch': 1} +{'type': 'loss', 'content': 0.02213556505739689, 'timestamp': '2025-09-10 02:34:48.379453', 'step': 40, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:48.432402', 'step': 40, 'epoch': 1} +{'type': 'loss', 'content': 0.03805265203118324, 'timestamp': '2025-09-10 02:34:48.434127', 'step': 41, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:48.487294', 'step': 41, 'epoch': 1} +{'type': 'loss', 'content': 0.02557053044438362, 'timestamp': '2025-09-10 02:34:48.489207', 'step': 42, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:48.548893', 'step': 42, 'epoch': 1} +{'type': 'loss', 'content': 0.02088429220020771, 'timestamp': '2025-09-10 02:34:48.559087', 'step': 43, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:48.612469', 'step': 43, 'epoch': 1} +{'type': 'loss', 'content': 0.06046781316399574, 'timestamp': '2025-09-10 02:34:48.618350', 'step': 44, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:48.670210', 'step': 44, 'epoch': 1} +{'type': 'loss', 'content': 0.021367188543081284, 'timestamp': '2025-09-10 02:34:48.671981', 'step': 45, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:48.724711', 'step': 45, 'epoch': 1} +{'type': 'loss', 'content': 0.016253933310508728, 'timestamp': '2025-09-10 02:34:48.726799', 'step': 46, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:48.778932', 'step': 46, 'epoch': 1} +{'type': 'loss', 'content': 0.011869534850120544, 'timestamp': '2025-09-10 02:34:48.785059', 'step': 47, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:48.837434', 'step': 47, 'epoch': 1} +{'type': 'loss', 'content': 0.029137682169675827, 'timestamp': '2025-09-10 02:34:48.843350', 'step': 48, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:34:48.898093', 'step': 48, 'epoch': 1} +{'type': 'loss', 'content': 0.038361966609954834, 'timestamp': '2025-09-10 02:34:48.902815', 'step': 49, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:34:48.955169', 'step': 49, 'epoch': 1} +{'type': 'loss', 'content': 0.017897581681609154, 'timestamp': '2025-09-10 02:34:48.962992', 'step': 50, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:49.015537', 'step': 50, 'epoch': 1} +{'type': 'loss', 'content': 0.03656654804944992, 'timestamp': '2025-09-10 02:34:49.017403', 'step': 51, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 18560112737920.0}, 'timestamp': '2025-09-10 02:34:49.148836', 'step': 51, 'epoch': 1} +{'type': 'loss', 'content': 0.02816540002822876, 'timestamp': '2025-09-10 02:34:49.175281', 'step': 52, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:49.227457', 'step': 52, 'epoch': 1} +{'type': 'loss', 'content': 0.022283677011728287, 'timestamp': '2025-09-10 02:34:49.229211', 'step': 53, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:49.281169', 'step': 53, 'epoch': 1} +{'type': 'loss', 'content': 0.01824142411351204, 'timestamp': '2025-09-10 02:34:49.287282', 'step': 54, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:49.339986', 'step': 54, 'epoch': 1} +{'type': 'loss', 'content': 0.027409756556153297, 'timestamp': '2025-09-10 02:34:49.341897', 'step': 55, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:34:49.414029', 'step': 55, 'epoch': 1} +{'type': 'loss', 'content': 0.018066495656967163, 'timestamp': '2025-09-10 02:34:49.426014', 'step': 56, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:49.477646', 'step': 56, 'epoch': 1} +{'type': 'loss', 'content': 0.015896065160632133, 'timestamp': '2025-09-10 02:34:49.479600', 'step': 57, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:49.531706', 'step': 57, 'epoch': 1} +{'type': 'loss', 'content': 0.032879505306482315, 'timestamp': '2025-09-10 02:34:49.533425', 'step': 58, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:49.585304', 'step': 58, 'epoch': 1} +{'type': 'loss', 'content': 0.023104578256607056, 'timestamp': '2025-09-10 02:34:49.591499', 'step': 59, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:49.643922', 'step': 59, 'epoch': 1} +{'type': 'loss', 'content': 0.028585180640220642, 'timestamp': '2025-09-10 02:34:49.649619', 'step': 60, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:49.701402', 'step': 60, 'epoch': 1} +{'type': 'loss', 'content': 0.030385613441467285, 'timestamp': '2025-09-10 02:34:49.711307', 'step': 61, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:49.763354', 'step': 61, 'epoch': 1} +{'type': 'loss', 'content': 0.015409083105623722, 'timestamp': '2025-09-10 02:34:49.765141', 'step': 62, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:49.816842', 'step': 62, 'epoch': 1} +{'type': 'loss', 'content': 0.008990316651761532, 'timestamp': '2025-09-10 02:34:49.819669', 'step': 63, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:49.871586', 'step': 63, 'epoch': 1} +{'type': 'loss', 'content': 0.027352871373295784, 'timestamp': '2025-09-10 02:34:49.877002', 'step': 64, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:34:49.942990', 'step': 64, 'epoch': 1} +{'type': 'loss', 'content': 0.010400832630693913, 'timestamp': '2025-09-10 02:34:49.956471', 'step': 65, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:50.009540', 'step': 65, 'epoch': 1} +{'type': 'loss', 'content': 0.02165437862277031, 'timestamp': '2025-09-10 02:34:50.011377', 'step': 66, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:50.063725', 'step': 66, 'epoch': 1} +{'type': 'loss', 'content': 0.03493545949459076, 'timestamp': '2025-09-10 02:34:50.065750', 'step': 67, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:50.118296', 'step': 67, 'epoch': 1} +{'type': 'loss', 'content': 0.018001478165388107, 'timestamp': '2025-09-10 02:34:50.124027', 'step': 68, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:50.175500', 'step': 68, 'epoch': 1} +{'type': 'loss', 'content': 0.019673990085721016, 'timestamp': '2025-09-10 02:34:50.178349', 'step': 69, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:34:50.245906', 'step': 69, 'epoch': 1} +{'type': 'loss', 'content': 0.01824493706226349, 'timestamp': '2025-09-10 02:34:50.258215', 'step': 70, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:50.311020', 'step': 70, 'epoch': 1} +{'type': 'loss', 'content': 0.012586208060383797, 'timestamp': '2025-09-10 02:34:50.312940', 'step': 71, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:34:50.368248', 'step': 71, 'epoch': 1} +{'type': 'loss', 'content': 0.004381082020699978, 'timestamp': '2025-09-10 02:34:50.377212', 'step': 72, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:50.428715', 'step': 72, 'epoch': 1} +{'type': 'loss', 'content': 0.031274572014808655, 'timestamp': '2025-09-10 02:34:50.430455', 'step': 73, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:50.483000', 'step': 73, 'epoch': 1} +{'type': 'loss', 'content': 0.0065897488966584206, 'timestamp': '2025-09-10 02:34:50.485267', 'step': 74, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:34:50.537626', 'step': 74, 'epoch': 1} +{'type': 'loss', 'content': 0.03049614652991295, 'timestamp': '2025-09-10 02:34:50.545530', 'step': 75, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:34:50.611566', 'step': 75, 'epoch': 1} +{'type': 'loss', 'content': 0.016821924597024918, 'timestamp': '2025-09-10 02:34:50.624379', 'step': 76, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:50.676079', 'step': 76, 'epoch': 1} +{'type': 'loss', 'content': 0.024387311190366745, 'timestamp': '2025-09-10 02:34:50.677738', 'step': 77, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:34:50.729816', 'step': 77, 'epoch': 1} +{'type': 'loss', 'content': 0.034385766834020615, 'timestamp': '2025-09-10 02:34:50.731588', 'step': 78, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:50.788552', 'step': 78, 'epoch': 1} +{'type': 'loss', 'content': 0.025617972016334534, 'timestamp': '2025-09-10 02:34:50.798736', 'step': 79, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:34:50.864771', 'step': 79, 'epoch': 1} +{'type': 'loss', 'content': 0.024418707937002182, 'timestamp': '2025-09-10 02:34:50.877538', 'step': 80, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:50.929714', 'step': 80, 'epoch': 1} +{'type': 'loss', 'content': 0.01649116910994053, 'timestamp': '2025-09-10 02:34:50.931631', 'step': 81, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:50.984309', 'step': 81, 'epoch': 1} +{'type': 'loss', 'content': 0.025838321074843407, 'timestamp': '2025-09-10 02:34:50.993752', 'step': 82, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:51.046019', 'step': 82, 'epoch': 1} +{'type': 'loss', 'content': 0.022280815988779068, 'timestamp': '2025-09-10 02:34:51.052226', 'step': 83, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:51.111822', 'step': 83, 'epoch': 1} +{'type': 'loss', 'content': 0.013495906256139278, 'timestamp': '2025-09-10 02:34:51.117327', 'step': 84, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:51.171830', 'step': 84, 'epoch': 1} +{'type': 'loss', 'content': 0.023006485775113106, 'timestamp': '2025-09-10 02:34:51.173780', 'step': 85, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:34:51.233162', 'step': 85, 'epoch': 1} +{'type': 'loss', 'content': 0.03280184417963028, 'timestamp': '2025-09-10 02:34:51.243689', 'step': 86, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:51.298413', 'step': 86, 'epoch': 1} +{'type': 'loss', 'content': 0.013478524051606655, 'timestamp': '2025-09-10 02:34:51.300198', 'step': 87, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:51.353211', 'step': 87, 'epoch': 1} +{'type': 'loss', 'content': 0.014801465906202793, 'timestamp': '2025-09-10 02:34:51.358968', 'step': 88, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:34:51.432966', 'step': 88, 'epoch': 1} +{'type': 'loss', 'content': 0.028322791680693626, 'timestamp': '2025-09-10 02:34:51.448008', 'step': 89, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:51.501354', 'step': 89, 'epoch': 1} +{'type': 'loss', 'content': 0.020225470885634422, 'timestamp': '2025-09-10 02:34:51.504548', 'step': 90, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:51.557316', 'step': 90, 'epoch': 1} +{'type': 'loss', 'content': 0.016057122498750687, 'timestamp': '2025-09-10 02:34:51.559722', 'step': 91, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:51.617615', 'step': 91, 'epoch': 1} +{'type': 'loss', 'content': 0.019442101940512657, 'timestamp': '2025-09-10 02:34:51.628625', 'step': 92, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:51.681629', 'step': 92, 'epoch': 1} +{'type': 'loss', 'content': 0.027180684730410576, 'timestamp': '2025-09-10 02:34:51.687491', 'step': 93, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:51.742098', 'step': 93, 'epoch': 1} +{'type': 'loss', 'content': 0.016686907038092613, 'timestamp': '2025-09-10 02:34:51.743935', 'step': 94, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:34:51.817839', 'step': 94, 'epoch': 1} +{'type': 'loss', 'content': 0.014631020836532116, 'timestamp': '2025-09-10 02:34:51.831339', 'step': 95, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:51.884016', 'step': 95, 'epoch': 1} +{'type': 'loss', 'content': 0.020649760961532593, 'timestamp': '2025-09-10 02:34:51.889349', 'step': 96, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:51.940557', 'step': 96, 'epoch': 1} +{'type': 'loss', 'content': 0.03311503306031227, 'timestamp': '2025-09-10 02:34:51.943354', 'step': 97, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:51.996011', 'step': 97, 'epoch': 1} +{'type': 'loss', 'content': 0.02003851905465126, 'timestamp': '2025-09-10 02:34:51.999153', 'step': 98, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:52.052283', 'step': 98, 'epoch': 1} +{'type': 'loss', 'content': 0.01943128928542137, 'timestamp': '2025-09-10 02:34:52.054142', 'step': 99, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:52.107004', 'step': 99, 'epoch': 1} +{'type': 'loss', 'content': 0.02221931517124176, 'timestamp': '2025-09-10 02:34:52.112823', 'step': 100, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:52.164486', 'step': 100, 'epoch': 1} +{'type': 'loss', 'content': 0.031217534095048904, 'timestamp': '2025-09-10 02:34:52.166360', 'step': 101, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:52.218710', 'step': 101, 'epoch': 1} +{'type': 'loss', 'content': 0.02912614308297634, 'timestamp': '2025-09-10 02:34:52.220332', 'step': 102, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:52.281198', 'step': 102, 'epoch': 1} +{'type': 'loss', 'content': 0.036009423434734344, 'timestamp': '2025-09-10 02:34:52.283813', 'step': 103, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:52.336329', 'step': 103, 'epoch': 1} +{'type': 'loss', 'content': 0.017426682636141777, 'timestamp': '2025-09-10 02:34:52.346519', 'step': 104, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:34:52.398571', 'step': 104, 'epoch': 1} +{'type': 'loss', 'content': 0.008325684815645218, 'timestamp': '2025-09-10 02:34:52.406394', 'step': 105, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:52.459271', 'step': 105, 'epoch': 1} +{'type': 'loss', 'content': 0.017156464979052544, 'timestamp': '2025-09-10 02:34:52.461167', 'step': 106, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:52.513476', 'step': 106, 'epoch': 1} +{'type': 'loss', 'content': 0.023122547194361687, 'timestamp': '2025-09-10 02:34:52.515666', 'step': 107, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:52.567779', 'step': 107, 'epoch': 1} +{'type': 'loss', 'content': 0.029338719323277473, 'timestamp': '2025-09-10 02:34:52.573579', 'step': 108, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:52.625222', 'step': 108, 'epoch': 1} +{'type': 'loss', 'content': 0.01162067148834467, 'timestamp': '2025-09-10 02:34:52.631557', 'step': 109, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:52.684166', 'step': 109, 'epoch': 1} +{'type': 'loss', 'content': 0.009132458828389645, 'timestamp': '2025-09-10 02:34:52.686029', 'step': 110, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:34:52.739827', 'step': 110, 'epoch': 1} +{'type': 'loss', 'content': 0.012252326123416424, 'timestamp': '2025-09-10 02:34:52.749420', 'step': 111, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:52.802490', 'step': 111, 'epoch': 1} +{'type': 'loss', 'content': 0.00482892943546176, 'timestamp': '2025-09-10 02:34:52.808173', 'step': 112, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:52.859717', 'step': 112, 'epoch': 1} +{'type': 'loss', 'content': 0.027315771207213402, 'timestamp': '2025-09-10 02:34:52.861550', 'step': 113, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:52.914055', 'step': 113, 'epoch': 1} +{'type': 'loss', 'content': 0.027633002027869225, 'timestamp': '2025-09-10 02:34:52.916177', 'step': 114, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:52.968934', 'step': 114, 'epoch': 1} +{'type': 'loss', 'content': 0.01256472896784544, 'timestamp': '2025-09-10 02:34:52.970926', 'step': 115, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:53.023268', 'step': 115, 'epoch': 1} +{'type': 'loss', 'content': 0.022422954440116882, 'timestamp': '2025-09-10 02:34:53.028842', 'step': 116, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:53.084995', 'step': 116, 'epoch': 1} +{'type': 'loss', 'content': 0.020762605592608452, 'timestamp': '2025-09-10 02:34:53.095971', 'step': 117, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:53.150162', 'step': 117, 'epoch': 1} +{'type': 'loss', 'content': 0.03215666860342026, 'timestamp': '2025-09-10 02:34:53.153151', 'step': 118, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:34:53.208917', 'step': 118, 'epoch': 1} +{'type': 'loss', 'content': 0.01391049288213253, 'timestamp': '2025-09-10 02:34:53.218477', 'step': 119, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:34:53.276156', 'step': 119, 'epoch': 1} +{'type': 'loss', 'content': 0.021440556272864342, 'timestamp': '2025-09-10 02:34:53.287185', 'step': 120, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:53.339259', 'step': 120, 'epoch': 1} +{'type': 'loss', 'content': 0.0282118059694767, 'timestamp': '2025-09-10 02:34:53.349203', 'step': 121, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:53.401702', 'step': 121, 'epoch': 1} +{'type': 'loss', 'content': 0.009898746386170387, 'timestamp': '2025-09-10 02:34:53.407994', 'step': 122, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:53.460467', 'step': 122, 'epoch': 1} +{'type': 'loss', 'content': 0.017764005810022354, 'timestamp': '2025-09-10 02:34:53.462505', 'step': 123, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:34:53.515071', 'step': 123, 'epoch': 1} +{'type': 'loss', 'content': 0.013925476931035519, 'timestamp': '2025-09-10 02:34:53.523774', 'step': 124, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:34:53.576828', 'step': 124, 'epoch': 1} +{'type': 'loss', 'content': 0.046622104942798615, 'timestamp': '2025-09-10 02:34:53.578762', 'step': 125, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:53.646326', 'step': 125, 'epoch': 1} +{'type': 'loss', 'content': 0.016143329441547394, 'timestamp': '2025-09-10 02:34:53.648300', 'step': 126, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:34:53.710054', 'step': 126, 'epoch': 1} +{'type': 'loss', 'content': 0.02423417940735817, 'timestamp': '2025-09-10 02:34:53.720946', 'step': 127, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:53.773868', 'step': 127, 'epoch': 1} +{'type': 'loss', 'content': 0.014292905107140541, 'timestamp': '2025-09-10 02:34:53.779825', 'step': 128, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:34:53.833318', 'step': 128, 'epoch': 1} +{'type': 'loss', 'content': 0.025560855865478516, 'timestamp': '2025-09-10 02:34:53.843616', 'step': 129, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:34:53.896737', 'step': 129, 'epoch': 1} +{'type': 'loss', 'content': 0.029963672161102295, 'timestamp': '2025-09-10 02:34:53.898917', 'step': 130, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:53.953559', 'step': 130, 'epoch': 1} +{'type': 'loss', 'content': 0.017245622351765633, 'timestamp': '2025-09-10 02:34:53.955367', 'step': 131, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:34:54.007814', 'step': 131, 'epoch': 1} +{'type': 'loss', 'content': 0.013878666795790195, 'timestamp': '2025-09-10 02:34:54.013700', 'step': 132, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:54.066441', 'step': 132, 'epoch': 1} +{'type': 'loss', 'content': 0.026786338537931442, 'timestamp': '2025-09-10 02:34:54.068475', 'step': 133, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:54.121367', 'step': 133, 'epoch': 1} +{'type': 'loss', 'content': 0.014176661148667336, 'timestamp': '2025-09-10 02:34:54.124126', 'step': 134, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:54.177741', 'step': 134, 'epoch': 1} +{'type': 'loss', 'content': 0.015191344544291496, 'timestamp': '2025-09-10 02:34:54.187152', 'step': 135, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:54.240469', 'step': 135, 'epoch': 1} +{'type': 'loss', 'content': 0.01789441891014576, 'timestamp': '2025-09-10 02:34:54.246496', 'step': 136, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:34:54.298462', 'step': 136, 'epoch': 1} +{'type': 'loss', 'content': 0.010462201200425625, 'timestamp': '2025-09-10 02:34:54.308481', 'step': 137, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:54.362454', 'step': 137, 'epoch': 1} +{'type': 'loss', 'content': 0.012132969684898853, 'timestamp': '2025-09-10 02:34:54.364524', 'step': 138, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:34:54.417439', 'step': 138, 'epoch': 1} +{'type': 'loss', 'content': 0.005434608552604914, 'timestamp': '2025-09-10 02:34:54.419568', 'step': 139, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:54.472660', 'step': 139, 'epoch': 1} +{'type': 'loss', 'content': 0.017947377637028694, 'timestamp': '2025-09-10 02:34:54.478745', 'step': 140, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:34:54.537342', 'step': 140, 'epoch': 1} +{'type': 'loss', 'content': 0.02349710650742054, 'timestamp': '2025-09-10 02:34:54.548743', 'step': 141, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:54.601777', 'step': 141, 'epoch': 1} +{'type': 'loss', 'content': 0.010277139954268932, 'timestamp': '2025-09-10 02:34:54.603774', 'step': 142, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:34:54.656381', 'step': 142, 'epoch': 1} +{'type': 'loss', 'content': 0.03177471458911896, 'timestamp': '2025-09-10 02:34:54.658539', 'step': 143, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:34:54.710999', 'step': 143, 'epoch': 1} +{'type': 'loss', 'content': 0.027295321226119995, 'timestamp': '2025-09-10 02:34:54.716868', 'step': 144, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:54.769275', 'step': 144, 'epoch': 1} +{'type': 'loss', 'content': 0.009652199223637581, 'timestamp': '2025-09-10 02:34:54.775651', 'step': 145, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:34:54.828783', 'step': 145, 'epoch': 1} +{'type': 'loss', 'content': 0.03103617951273918, 'timestamp': '2025-09-10 02:34:54.830682', 'step': 146, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:34:54.883225', 'step': 146, 'epoch': 1} +{'type': 'loss', 'content': 0.023361459374427795, 'timestamp': '2025-09-10 02:34:54.889589', 'step': 147, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:35:11.621207', 'step': 147, 'epoch': 1} +{'type': 'pplx', 'content': 19670390.152276028, 'timestamp': '2025-09-10 02:35:11.624260', 'step': 147, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:11.678967', 'step': 147, 'epoch': 1} +{'type': 'loss', 'content': 0.010303986258804798, 'timestamp': '2025-09-10 02:35:11.685027', 'step': 148, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:11.737709', 'step': 148, 'epoch': 1} +{'type': 'loss', 'content': 0.009087040089070797, 'timestamp': '2025-09-10 02:35:11.739700', 'step': 149, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:11.793579', 'step': 149, 'epoch': 1} +{'type': 'loss', 'content': 0.01798897422850132, 'timestamp': '2025-09-10 02:35:11.795789', 'step': 150, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:35:11.858128', 'step': 150, 'epoch': 1} +{'type': 'loss', 'content': 0.03148674964904785, 'timestamp': '2025-09-10 02:35:11.869067', 'step': 151, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:11.922495', 'step': 151, 'epoch': 1} +{'type': 'loss', 'content': 0.01954035647213459, 'timestamp': '2025-09-10 02:35:11.929800', 'step': 152, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:11.981637', 'step': 152, 'epoch': 1} +{'type': 'loss', 'content': 0.03088425099849701, 'timestamp': '2025-09-10 02:35:11.983531', 'step': 153, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:12.035902', 'step': 153, 'epoch': 1} +{'type': 'loss', 'content': 0.022318672388792038, 'timestamp': '2025-09-10 02:35:12.038107', 'step': 154, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:12.092568', 'step': 154, 'epoch': 1} +{'type': 'loss', 'content': 0.00890205055475235, 'timestamp': '2025-09-10 02:35:12.102410', 'step': 155, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:12.155800', 'step': 155, 'epoch': 1} +{'type': 'loss', 'content': 0.026110004633665085, 'timestamp': '2025-09-10 02:35:12.161784', 'step': 156, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:12.213673', 'step': 156, 'epoch': 1} +{'type': 'loss', 'content': 0.027041813358664513, 'timestamp': '2025-09-10 02:35:12.215971', 'step': 157, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:12.269231', 'step': 157, 'epoch': 1} +{'type': 'loss', 'content': 0.013711978681385517, 'timestamp': '2025-09-10 02:35:12.278827', 'step': 158, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:12.333247', 'step': 158, 'epoch': 1} +{'type': 'loss', 'content': 0.01726366952061653, 'timestamp': '2025-09-10 02:35:12.343030', 'step': 159, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:12.396684', 'step': 159, 'epoch': 1} +{'type': 'loss', 'content': 0.01840554177761078, 'timestamp': '2025-09-10 02:35:12.402696', 'step': 160, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:12.455600', 'step': 160, 'epoch': 1} +{'type': 'loss', 'content': 0.03681541234254837, 'timestamp': '2025-09-10 02:35:12.457692', 'step': 161, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:12.511047', 'step': 161, 'epoch': 1} +{'type': 'loss', 'content': 0.00946278590708971, 'timestamp': '2025-09-10 02:35:12.513105', 'step': 162, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:12.565948', 'step': 162, 'epoch': 1} +{'type': 'loss', 'content': 0.03138422220945358, 'timestamp': '2025-09-10 02:35:12.568150', 'step': 163, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:12.620901', 'step': 163, 'epoch': 1} +{'type': 'loss', 'content': 0.013762605376541615, 'timestamp': '2025-09-10 02:35:12.626911', 'step': 164, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:12.678627', 'step': 164, 'epoch': 1} +{'type': 'loss', 'content': 0.0059081860817968845, 'timestamp': '2025-09-10 02:35:12.680461', 'step': 165, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:12.733022', 'step': 165, 'epoch': 1} +{'type': 'loss', 'content': 0.018755516037344933, 'timestamp': '2025-09-10 02:35:12.739660', 'step': 166, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:12.793157', 'step': 166, 'epoch': 1} +{'type': 'loss', 'content': 0.02642093226313591, 'timestamp': '2025-09-10 02:35:12.802772', 'step': 167, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:12.862505', 'step': 167, 'epoch': 1} +{'type': 'loss', 'content': 0.013496960513293743, 'timestamp': '2025-09-10 02:35:12.874018', 'step': 168, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:12.930412', 'step': 168, 'epoch': 1} +{'type': 'loss', 'content': 0.041730333119630814, 'timestamp': '2025-09-10 02:35:12.941648', 'step': 169, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:12.994841', 'step': 169, 'epoch': 1} +{'type': 'loss', 'content': 0.0057114423252642155, 'timestamp': '2025-09-10 02:35:12.997263', 'step': 170, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:13.051186', 'step': 170, 'epoch': 1} +{'type': 'loss', 'content': 0.026038197800517082, 'timestamp': '2025-09-10 02:35:13.053365', 'step': 171, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:13.107940', 'step': 171, 'epoch': 1} +{'type': 'loss', 'content': 0.013238680548965931, 'timestamp': '2025-09-10 02:35:13.118516', 'step': 172, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:13.178000', 'step': 172, 'epoch': 1} +{'type': 'loss', 'content': 0.014438546262681484, 'timestamp': '2025-09-10 02:35:13.189579', 'step': 173, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:13.242761', 'step': 173, 'epoch': 1} +{'type': 'loss', 'content': 0.012119495309889317, 'timestamp': '2025-09-10 02:35:13.244698', 'step': 174, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:13.297684', 'step': 174, 'epoch': 1} +{'type': 'loss', 'content': 0.021922742947936058, 'timestamp': '2025-09-10 02:35:13.299650', 'step': 175, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:13.353561', 'step': 175, 'epoch': 1} +{'type': 'loss', 'content': 0.04477754235267639, 'timestamp': '2025-09-10 02:35:13.364128', 'step': 176, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:13.415938', 'step': 176, 'epoch': 1} +{'type': 'loss', 'content': 0.010975569486618042, 'timestamp': '2025-09-10 02:35:13.418070', 'step': 177, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:35:13.507803', 'step': 177, 'epoch': 1} +{'type': 'loss', 'content': 0.02194954827427864, 'timestamp': '2025-09-10 02:35:13.524962', 'step': 178, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:13.579344', 'step': 178, 'epoch': 1} +{'type': 'loss', 'content': 0.02227923832833767, 'timestamp': '2025-09-10 02:35:13.589177', 'step': 179, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:13.641729', 'step': 179, 'epoch': 1} +{'type': 'loss', 'content': 0.011062792502343655, 'timestamp': '2025-09-10 02:35:13.647609', 'step': 180, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:13.699971', 'step': 180, 'epoch': 1} +{'type': 'loss', 'content': 0.030542535707354546, 'timestamp': '2025-09-10 02:35:13.702148', 'step': 181, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:13.762170', 'step': 181, 'epoch': 1} +{'type': 'loss', 'content': 0.037561897188425064, 'timestamp': '2025-09-10 02:35:13.772894', 'step': 182, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:35:13.834136', 'step': 182, 'epoch': 1} +{'type': 'loss', 'content': 0.021802807226777077, 'timestamp': '2025-09-10 02:35:13.845274', 'step': 183, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:13.898490', 'step': 183, 'epoch': 1} +{'type': 'loss', 'content': 0.03886691480875015, 'timestamp': '2025-09-10 02:35:13.904351', 'step': 184, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:13.961312', 'step': 184, 'epoch': 1} +{'type': 'loss', 'content': 0.025196580216288567, 'timestamp': '2025-09-10 02:35:13.963505', 'step': 185, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:14.016439', 'step': 185, 'epoch': 1} +{'type': 'loss', 'content': 0.01903093047440052, 'timestamp': '2025-09-10 02:35:14.018578', 'step': 186, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:14.071012', 'step': 186, 'epoch': 1} +{'type': 'loss', 'content': 0.0020099657122045755, 'timestamp': '2025-09-10 02:35:14.072908', 'step': 187, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:14.125180', 'step': 187, 'epoch': 1} +{'type': 'loss', 'content': 0.01556878350675106, 'timestamp': '2025-09-10 02:35:14.132569', 'step': 188, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:14.184238', 'step': 188, 'epoch': 1} +{'type': 'loss', 'content': 0.02499142661690712, 'timestamp': '2025-09-10 02:35:14.186226', 'step': 189, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:14.238459', 'step': 189, 'epoch': 1} +{'type': 'loss', 'content': 0.024990113452076912, 'timestamp': '2025-09-10 02:35:14.241607', 'step': 190, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:14.293976', 'step': 190, 'epoch': 1} +{'type': 'loss', 'content': 0.00737761938944459, 'timestamp': '2025-09-10 02:35:14.296285', 'step': 191, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:35:14.356894', 'step': 191, 'epoch': 1} +{'type': 'loss', 'content': 0.015332886017858982, 'timestamp': '2025-09-10 02:35:14.368533', 'step': 192, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:14.420478', 'step': 192, 'epoch': 1} +{'type': 'loss', 'content': 0.020425716415047646, 'timestamp': '2025-09-10 02:35:14.423438', 'step': 193, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:14.476022', 'step': 193, 'epoch': 1} +{'type': 'loss', 'content': 0.017640886828303337, 'timestamp': '2025-09-10 02:35:14.478043', 'step': 194, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:14.530606', 'step': 194, 'epoch': 1} +{'type': 'loss', 'content': 0.013645542785525322, 'timestamp': '2025-09-10 02:35:14.532726', 'step': 195, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:14.585391', 'step': 195, 'epoch': 1} +{'type': 'loss', 'content': 0.023901494219899178, 'timestamp': '2025-09-10 02:35:14.591286', 'step': 196, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:14.643567', 'step': 196, 'epoch': 1} +{'type': 'loss', 'content': 0.05236082524061203, 'timestamp': '2025-09-10 02:35:14.649894', 'step': 197, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:14.702833', 'step': 197, 'epoch': 1} +{'type': 'loss', 'content': 0.0183752179145813, 'timestamp': '2025-09-10 02:35:14.704928', 'step': 198, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:14.757323', 'step': 198, 'epoch': 1} +{'type': 'loss', 'content': 0.01529760006815195, 'timestamp': '2025-09-10 02:35:14.759418', 'step': 199, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:14.812770', 'step': 199, 'epoch': 1} +{'type': 'loss', 'content': 0.018185382708907127, 'timestamp': '2025-09-10 02:35:14.821800', 'step': 200, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:14.874427', 'step': 200, 'epoch': 1} +{'type': 'loss', 'content': 0.020868321880698204, 'timestamp': '2025-09-10 02:35:14.876542', 'step': 201, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:14.929175', 'step': 201, 'epoch': 1} +{'type': 'loss', 'content': 0.008937294594943523, 'timestamp': '2025-09-10 02:35:14.931334', 'step': 202, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:14.984624', 'step': 202, 'epoch': 1} +{'type': 'loss', 'content': 0.008630351163446903, 'timestamp': '2025-09-10 02:35:14.986893', 'step': 203, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:15.040527', 'step': 203, 'epoch': 1} +{'type': 'loss', 'content': 0.020822303369641304, 'timestamp': '2025-09-10 02:35:15.046383', 'step': 204, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:15.098040', 'step': 204, 'epoch': 1} +{'type': 'loss', 'content': 0.004343842621892691, 'timestamp': '2025-09-10 02:35:15.101110', 'step': 205, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:15.153775', 'step': 205, 'epoch': 1} +{'type': 'loss', 'content': 0.0031640089582651854, 'timestamp': '2025-09-10 02:35:15.155840', 'step': 206, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:15.208497', 'step': 206, 'epoch': 1} +{'type': 'loss', 'content': 0.01837976649403572, 'timestamp': '2025-09-10 02:35:15.210770', 'step': 207, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:15.264206', 'step': 207, 'epoch': 1} +{'type': 'loss', 'content': 0.018046779558062553, 'timestamp': '2025-09-10 02:35:15.274523', 'step': 208, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:15.327275', 'step': 208, 'epoch': 1} +{'type': 'loss', 'content': 0.0076715522445738316, 'timestamp': '2025-09-10 02:35:15.329971', 'step': 209, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:15.383139', 'step': 209, 'epoch': 1} +{'type': 'loss', 'content': 0.0058698770590126514, 'timestamp': '2025-09-10 02:35:15.385333', 'step': 210, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:15.437789', 'step': 210, 'epoch': 1} +{'type': 'loss', 'content': 0.03286213427782059, 'timestamp': '2025-09-10 02:35:15.440810', 'step': 211, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:15.493525', 'step': 211, 'epoch': 1} +{'type': 'loss', 'content': 0.015056388452649117, 'timestamp': '2025-09-10 02:35:15.499408', 'step': 212, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:15.551083', 'step': 212, 'epoch': 1} +{'type': 'loss', 'content': 0.005245543550699949, 'timestamp': '2025-09-10 02:35:15.553447', 'step': 213, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:15.606607', 'step': 213, 'epoch': 1} +{'type': 'loss', 'content': 0.025814538821578026, 'timestamp': '2025-09-10 02:35:15.609007', 'step': 214, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:15.661618', 'step': 214, 'epoch': 1} +{'type': 'loss', 'content': 0.01814667321741581, 'timestamp': '2025-09-10 02:35:15.664108', 'step': 215, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:15.716762', 'step': 215, 'epoch': 1} +{'type': 'loss', 'content': 0.02777264080941677, 'timestamp': '2025-09-10 02:35:15.722633', 'step': 216, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:15.774598', 'step': 216, 'epoch': 1} +{'type': 'loss', 'content': 0.0160059817135334, 'timestamp': '2025-09-10 02:35:15.776706', 'step': 217, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:15.834994', 'step': 217, 'epoch': 1} +{'type': 'loss', 'content': 0.046521928161382675, 'timestamp': '2025-09-10 02:35:15.845367', 'step': 218, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:15.898771', 'step': 218, 'epoch': 1} +{'type': 'loss', 'content': 0.008506479673087597, 'timestamp': '2025-09-10 02:35:15.901016', 'step': 219, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:15.955431', 'step': 219, 'epoch': 1} +{'type': 'loss', 'content': 0.014576002024114132, 'timestamp': '2025-09-10 02:35:15.965969', 'step': 220, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:16.024658', 'step': 220, 'epoch': 1} +{'type': 'loss', 'content': 0.004695187322795391, 'timestamp': '2025-09-10 02:35:16.036168', 'step': 221, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:16.089549', 'step': 221, 'epoch': 1} +{'type': 'loss', 'content': 0.030469149351119995, 'timestamp': '2025-09-10 02:35:16.091624', 'step': 222, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:16.144707', 'step': 222, 'epoch': 1} +{'type': 'loss', 'content': 0.00731209060177207, 'timestamp': '2025-09-10 02:35:16.146884', 'step': 223, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:16.199407', 'step': 223, 'epoch': 1} +{'type': 'loss', 'content': 0.00895555317401886, 'timestamp': '2025-09-10 02:35:16.205364', 'step': 224, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:35:16.286947', 'step': 224, 'epoch': 1} +{'type': 'loss', 'content': 0.014731078408658504, 'timestamp': '2025-09-10 02:35:16.303400', 'step': 225, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:16.357335', 'step': 225, 'epoch': 1} +{'type': 'loss', 'content': 0.03776868060231209, 'timestamp': '2025-09-10 02:35:16.365322', 'step': 226, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:16.417941', 'step': 226, 'epoch': 1} +{'type': 'loss', 'content': 0.006093989592045546, 'timestamp': '2025-09-10 02:35:16.425723', 'step': 227, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:16.478478', 'step': 227, 'epoch': 1} +{'type': 'loss', 'content': 0.016133157536387444, 'timestamp': '2025-09-10 02:35:16.484402', 'step': 228, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:16.541287', 'step': 228, 'epoch': 1} +{'type': 'loss', 'content': 0.025382960215210915, 'timestamp': '2025-09-10 02:35:16.552471', 'step': 229, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:16.607586', 'step': 229, 'epoch': 1} +{'type': 'loss', 'content': 0.03177328780293465, 'timestamp': '2025-09-10 02:35:16.617371', 'step': 230, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:35:16.684221', 'step': 230, 'epoch': 1} +{'type': 'loss', 'content': 0.005937655922025442, 'timestamp': '2025-09-10 02:35:16.696469', 'step': 231, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:16.749465', 'step': 231, 'epoch': 1} +{'type': 'loss', 'content': 0.021884040907025337, 'timestamp': '2025-09-10 02:35:16.756417', 'step': 232, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:16.828347', 'step': 232, 'epoch': 1} +{'type': 'loss', 'content': 0.033356085419654846, 'timestamp': '2025-09-10 02:35:16.838266', 'step': 233, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:16.897654', 'step': 233, 'epoch': 1} +{'type': 'loss', 'content': 0.017480988055467606, 'timestamp': '2025-09-10 02:35:16.907205', 'step': 234, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:16.973785', 'step': 234, 'epoch': 1} +{'type': 'loss', 'content': 0.04444620758295059, 'timestamp': '2025-09-10 02:35:16.976021', 'step': 235, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:17.029330', 'step': 235, 'epoch': 1} +{'type': 'loss', 'content': 0.02271200343966484, 'timestamp': '2025-09-10 02:35:17.035477', 'step': 236, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:17.087888', 'step': 236, 'epoch': 1} +{'type': 'loss', 'content': 0.02542167343199253, 'timestamp': '2025-09-10 02:35:17.090626', 'step': 237, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:17.143441', 'step': 237, 'epoch': 1} +{'type': 'loss', 'content': 0.014857185073196888, 'timestamp': '2025-09-10 02:35:17.145527', 'step': 238, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:17.198064', 'step': 238, 'epoch': 1} +{'type': 'loss', 'content': 0.008329623378813267, 'timestamp': '2025-09-10 02:35:17.200133', 'step': 239, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:35:17.268370', 'step': 239, 'epoch': 1} +{'type': 'loss', 'content': 0.017592785879969597, 'timestamp': '2025-09-10 02:35:17.280828', 'step': 240, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:17.334901', 'step': 240, 'epoch': 1} +{'type': 'loss', 'content': 0.006798648275434971, 'timestamp': '2025-09-10 02:35:17.337668', 'step': 241, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:17.389774', 'step': 241, 'epoch': 1} +{'type': 'loss', 'content': 0.028966281563043594, 'timestamp': '2025-09-10 02:35:17.391883', 'step': 242, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:17.444531', 'step': 242, 'epoch': 1} +{'type': 'loss', 'content': 0.027059515938162804, 'timestamp': '2025-09-10 02:35:17.447015', 'step': 243, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:17.499692', 'step': 243, 'epoch': 1} +{'type': 'loss', 'content': 0.027755940333008766, 'timestamp': '2025-09-10 02:35:17.507136', 'step': 244, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:17.561771', 'step': 244, 'epoch': 1} +{'type': 'loss', 'content': 0.03391401469707489, 'timestamp': '2025-09-10 02:35:17.563767', 'step': 245, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:17.616108', 'step': 245, 'epoch': 1} +{'type': 'loss', 'content': 0.01781395450234413, 'timestamp': '2025-09-10 02:35:17.618339', 'step': 246, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:17.671172', 'step': 246, 'epoch': 1} +{'type': 'loss', 'content': 0.012044159695506096, 'timestamp': '2025-09-10 02:35:17.674272', 'step': 247, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:35:17.742367', 'step': 247, 'epoch': 1} +{'type': 'loss', 'content': 0.008590635843575, 'timestamp': '2025-09-10 02:35:17.755742', 'step': 248, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:17.808358', 'step': 248, 'epoch': 1} +{'type': 'loss', 'content': 0.03630336374044418, 'timestamp': '2025-09-10 02:35:17.810532', 'step': 249, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:17.868550', 'step': 249, 'epoch': 1} +{'type': 'loss', 'content': 0.009653487242758274, 'timestamp': '2025-09-10 02:35:17.879016', 'step': 250, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:17.932146', 'step': 250, 'epoch': 1} +{'type': 'loss', 'content': 0.0035890305880457163, 'timestamp': '2025-09-10 02:35:17.934204', 'step': 251, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:17.988605', 'step': 251, 'epoch': 1} +{'type': 'loss', 'content': 0.0024094083346426487, 'timestamp': '2025-09-10 02:35:17.999245', 'step': 252, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:18.051281', 'step': 252, 'epoch': 1} +{'type': 'loss', 'content': 0.00296982005238533, 'timestamp': '2025-09-10 02:35:18.053541', 'step': 253, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.106193', 'step': 253, 'epoch': 1} +{'type': 'loss', 'content': 0.02122844196856022, 'timestamp': '2025-09-10 02:35:18.111288', 'step': 254, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:18.163941', 'step': 254, 'epoch': 1} +{'type': 'loss', 'content': 0.019904855638742447, 'timestamp': '2025-09-10 02:35:18.166123', 'step': 255, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.218451', 'step': 255, 'epoch': 1} +{'type': 'loss', 'content': 0.006383887492120266, 'timestamp': '2025-09-10 02:35:18.224426', 'step': 256, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.276965', 'step': 256, 'epoch': 1} +{'type': 'loss', 'content': 0.01735054701566696, 'timestamp': '2025-09-10 02:35:18.280039', 'step': 257, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:18.333173', 'step': 257, 'epoch': 1} +{'type': 'loss', 'content': 0.010717264376580715, 'timestamp': '2025-09-10 02:35:18.335265', 'step': 258, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:18.388014', 'step': 258, 'epoch': 1} +{'type': 'loss', 'content': 0.008735094219446182, 'timestamp': '2025-09-10 02:35:18.389998', 'step': 259, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.442698', 'step': 259, 'epoch': 1} +{'type': 'loss', 'content': 0.02538483962416649, 'timestamp': '2025-09-10 02:35:18.448600', 'step': 260, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:18.505052', 'step': 260, 'epoch': 1} +{'type': 'loss', 'content': 0.020319253206253052, 'timestamp': '2025-09-10 02:35:18.516210', 'step': 261, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:18.569134', 'step': 261, 'epoch': 1} +{'type': 'loss', 'content': 0.05068264529109001, 'timestamp': '2025-09-10 02:35:18.571195', 'step': 262, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:18.624077', 'step': 262, 'epoch': 1} +{'type': 'loss', 'content': 0.019700633361935616, 'timestamp': '2025-09-10 02:35:18.626304', 'step': 263, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:18.679409', 'step': 263, 'epoch': 1} +{'type': 'loss', 'content': 0.03039342351257801, 'timestamp': '2025-09-10 02:35:18.688261', 'step': 264, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.740116', 'step': 264, 'epoch': 1} +{'type': 'loss', 'content': 0.016535136848688126, 'timestamp': '2025-09-10 02:35:18.743100', 'step': 265, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:18.800715', 'step': 265, 'epoch': 1} +{'type': 'loss', 'content': 0.04237798973917961, 'timestamp': '2025-09-10 02:35:18.811169', 'step': 266, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:18.864103', 'step': 266, 'epoch': 1} +{'type': 'loss', 'content': 0.018209638074040413, 'timestamp': '2025-09-10 02:35:18.867165', 'step': 267, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:35:18.919117', 'step': 267, 'epoch': 1} +{'type': 'loss', 'content': 0.015615333802998066, 'timestamp': '2025-09-10 02:35:18.924899', 'step': 268, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:18.976888', 'step': 268, 'epoch': 1} +{'type': 'loss', 'content': 0.025077302008867264, 'timestamp': '2025-09-10 02:35:18.978983', 'step': 269, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:19.033182', 'step': 269, 'epoch': 1} +{'type': 'loss', 'content': 0.007397750858217478, 'timestamp': '2025-09-10 02:35:19.042956', 'step': 270, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:19.096719', 'step': 270, 'epoch': 1} +{'type': 'loss', 'content': 0.007934799417853355, 'timestamp': '2025-09-10 02:35:19.099061', 'step': 271, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:19.152208', 'step': 271, 'epoch': 1} +{'type': 'loss', 'content': 0.029624303802847862, 'timestamp': '2025-09-10 02:35:19.158490', 'step': 272, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:19.211762', 'step': 272, 'epoch': 1} +{'type': 'loss', 'content': 0.017695227637887, 'timestamp': '2025-09-10 02:35:19.213927', 'step': 273, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:19.266325', 'step': 273, 'epoch': 1} +{'type': 'loss', 'content': 0.01380168367177248, 'timestamp': '2025-09-10 02:35:19.268474', 'step': 274, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:19.320542', 'step': 274, 'epoch': 1} +{'type': 'loss', 'content': 0.007517028599977493, 'timestamp': '2025-09-10 02:35:19.322446', 'step': 275, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:19.375231', 'step': 275, 'epoch': 1} +{'type': 'loss', 'content': 0.009313727729022503, 'timestamp': '2025-09-10 02:35:19.380909', 'step': 276, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:19.439432', 'step': 276, 'epoch': 1} +{'type': 'loss', 'content': 0.02604990266263485, 'timestamp': '2025-09-10 02:35:19.451027', 'step': 277, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:19.503948', 'step': 277, 'epoch': 1} +{'type': 'loss', 'content': 0.013054589740931988, 'timestamp': '2025-09-10 02:35:19.506033', 'step': 278, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:19.558328', 'step': 278, 'epoch': 1} +{'type': 'loss', 'content': 0.011929529719054699, 'timestamp': '2025-09-10 02:35:19.560365', 'step': 279, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:19.612748', 'step': 279, 'epoch': 1} +{'type': 'loss', 'content': 0.009011547081172466, 'timestamp': '2025-09-10 02:35:19.618610', 'step': 280, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:19.670507', 'step': 280, 'epoch': 1} +{'type': 'loss', 'content': 0.02468419075012207, 'timestamp': '2025-09-10 02:35:19.680780', 'step': 281, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:19.733387', 'step': 281, 'epoch': 1} +{'type': 'loss', 'content': 0.00777811324223876, 'timestamp': '2025-09-10 02:35:19.735557', 'step': 282, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:19.787907', 'step': 282, 'epoch': 1} +{'type': 'loss', 'content': 0.030644703656435013, 'timestamp': '2025-09-10 02:35:19.790025', 'step': 283, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:19.842598', 'step': 283, 'epoch': 1} +{'type': 'loss', 'content': 0.02055281400680542, 'timestamp': '2025-09-10 02:35:19.848176', 'step': 284, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:19.900113', 'step': 284, 'epoch': 1} +{'type': 'loss', 'content': 0.02825690247118473, 'timestamp': '2025-09-10 02:35:19.902183', 'step': 285, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:19.954681', 'step': 285, 'epoch': 1} +{'type': 'loss', 'content': 0.014599240384995937, 'timestamp': '2025-09-10 02:35:19.956946', 'step': 286, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:20.009750', 'step': 286, 'epoch': 1} +{'type': 'loss', 'content': 0.009029184468090534, 'timestamp': '2025-09-10 02:35:20.012246', 'step': 287, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:20.069617', 'step': 287, 'epoch': 1} +{'type': 'loss', 'content': 0.009579007513821125, 'timestamp': '2025-09-10 02:35:20.088602', 'step': 288, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:20.149928', 'step': 288, 'epoch': 1} +{'type': 'loss', 'content': 0.028099318966269493, 'timestamp': '2025-09-10 02:35:20.156004', 'step': 289, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:20.222737', 'step': 289, 'epoch': 1} +{'type': 'loss', 'content': 0.0020475992932915688, 'timestamp': '2025-09-10 02:35:20.225451', 'step': 290, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:20.287218', 'step': 290, 'epoch': 1} +{'type': 'loss', 'content': 0.009952199645340443, 'timestamp': '2025-09-10 02:35:20.296766', 'step': 291, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:20.359226', 'step': 291, 'epoch': 1} +{'type': 'loss', 'content': 0.0032330357935279608, 'timestamp': '2025-09-10 02:35:20.366041', 'step': 292, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:20.434462', 'step': 292, 'epoch': 1} +{'type': 'loss', 'content': 0.01548854261636734, 'timestamp': '2025-09-10 02:35:20.440080', 'step': 293, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:20.510435', 'step': 293, 'epoch': 1} +{'type': 'loss', 'content': 0.012301183305680752, 'timestamp': '2025-09-10 02:35:20.512982', 'step': 294, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:35:37.283257', 'step': 294, 'epoch': 1} +{'type': 'pplx', 'content': 18750520.76587751, 'timestamp': '2025-09-10 02:35:37.286063', 'step': 294, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:37.345243', 'step': 294, 'epoch': 1} +{'type': 'loss', 'content': 0.018759505823254585, 'timestamp': '2025-09-10 02:35:37.355905', 'step': 295, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:35:37.417674', 'step': 295, 'epoch': 1} +{'type': 'loss', 'content': 0.015110853128135204, 'timestamp': '2025-09-10 02:35:37.429566', 'step': 296, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:37.482442', 'step': 296, 'epoch': 1} +{'type': 'loss', 'content': 0.014034484513103962, 'timestamp': '2025-09-10 02:35:37.484570', 'step': 297, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:37.537534', 'step': 297, 'epoch': 1} +{'type': 'loss', 'content': 0.008410094305872917, 'timestamp': '2025-09-10 02:35:37.539580', 'step': 298, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:35:37.605684', 'step': 298, 'epoch': 1} +{'type': 'loss', 'content': 0.012407462112605572, 'timestamp': '2025-09-10 02:35:37.617909', 'step': 299, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:37.670483', 'step': 299, 'epoch': 1} +{'type': 'loss', 'content': 0.03873913362622261, 'timestamp': '2025-09-10 02:35:37.676484', 'step': 300, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:37.728626', 'step': 300, 'epoch': 1} +{'type': 'loss', 'content': 0.013834113255143166, 'timestamp': '2025-09-10 02:35:37.730541', 'step': 301, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:37.783932', 'step': 301, 'epoch': 1} +{'type': 'loss', 'content': 0.03874850645661354, 'timestamp': '2025-09-10 02:35:37.789928', 'step': 302, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:37.844381', 'step': 302, 'epoch': 1} +{'type': 'loss', 'content': 0.04323044791817665, 'timestamp': '2025-09-10 02:35:37.849821', 'step': 303, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:37.904337', 'step': 303, 'epoch': 1} +{'type': 'loss', 'content': 0.01321121584624052, 'timestamp': '2025-09-10 02:35:37.910701', 'step': 304, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:37.964091', 'step': 304, 'epoch': 1} +{'type': 'loss', 'content': 0.01439062599092722, 'timestamp': '2025-09-10 02:35:37.969949', 'step': 305, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:38.023377', 'step': 305, 'epoch': 1} +{'type': 'loss', 'content': 0.01884712092578411, 'timestamp': '2025-09-10 02:35:38.030853', 'step': 306, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:38.085130', 'step': 306, 'epoch': 1} +{'type': 'loss', 'content': 0.01008074451237917, 'timestamp': '2025-09-10 02:35:38.094951', 'step': 307, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:38.148115', 'step': 307, 'epoch': 1} +{'type': 'loss', 'content': 0.01976635865867138, 'timestamp': '2025-09-10 02:35:38.154101', 'step': 308, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:38.206363', 'step': 308, 'epoch': 1} +{'type': 'loss', 'content': 0.015738816931843758, 'timestamp': '2025-09-10 02:35:38.208593', 'step': 309, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:38.261153', 'step': 309, 'epoch': 1} +{'type': 'loss', 'content': 0.006744579412043095, 'timestamp': '2025-09-10 02:35:38.263257', 'step': 310, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:38.315786', 'step': 310, 'epoch': 1} +{'type': 'loss', 'content': 0.008818583562970161, 'timestamp': '2025-09-10 02:35:38.318142', 'step': 311, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:35:38.379558', 'step': 311, 'epoch': 1} +{'type': 'loss', 'content': 0.008585556410253048, 'timestamp': '2025-09-10 02:35:38.391424', 'step': 312, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:38.450215', 'step': 312, 'epoch': 1} +{'type': 'loss', 'content': 0.022689757868647575, 'timestamp': '2025-09-10 02:35:38.461806', 'step': 313, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:38.514991', 'step': 313, 'epoch': 1} +{'type': 'loss', 'content': 0.018085205927491188, 'timestamp': '2025-09-10 02:35:38.524618', 'step': 314, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:38.578333', 'step': 314, 'epoch': 1} +{'type': 'loss', 'content': 0.025187838822603226, 'timestamp': '2025-09-10 02:35:38.580440', 'step': 315, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:38.643387', 'step': 315, 'epoch': 1} +{'type': 'loss', 'content': 0.021669624373316765, 'timestamp': '2025-09-10 02:35:38.649529', 'step': 316, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:38.704464', 'step': 316, 'epoch': 1} +{'type': 'loss', 'content': 0.018435750156641006, 'timestamp': '2025-09-10 02:35:38.706739', 'step': 317, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:38.760944', 'step': 317, 'epoch': 1} +{'type': 'loss', 'content': 0.025637488812208176, 'timestamp': '2025-09-10 02:35:38.767968', 'step': 318, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:38.821051', 'step': 318, 'epoch': 1} +{'type': 'loss', 'content': 0.017072511836886406, 'timestamp': '2025-09-10 02:35:38.823215', 'step': 319, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:38.876069', 'step': 319, 'epoch': 1} +{'type': 'loss', 'content': 0.01036195270717144, 'timestamp': '2025-09-10 02:35:38.883407', 'step': 320, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:35:38.948276', 'step': 320, 'epoch': 1} +{'type': 'loss', 'content': 0.012251333333551884, 'timestamp': '2025-09-10 02:35:38.961481', 'step': 321, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:39.013802', 'step': 321, 'epoch': 1} +{'type': 'loss', 'content': 0.013256451115012169, 'timestamp': '2025-09-10 02:35:39.016809', 'step': 322, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:39.071579', 'step': 322, 'epoch': 1} +{'type': 'loss', 'content': 0.017366288229823112, 'timestamp': '2025-09-10 02:35:39.081373', 'step': 323, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:39.134874', 'step': 323, 'epoch': 1} +{'type': 'loss', 'content': 0.019963664934039116, 'timestamp': '2025-09-10 02:35:39.141251', 'step': 324, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:39.193938', 'step': 324, 'epoch': 1} +{'type': 'loss', 'content': 0.029277343302965164, 'timestamp': '2025-09-10 02:35:39.196312', 'step': 325, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:39.249302', 'step': 325, 'epoch': 1} +{'type': 'loss', 'content': 0.006462120451033115, 'timestamp': '2025-09-10 02:35:39.251365', 'step': 326, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:39.304623', 'step': 326, 'epoch': 1} +{'type': 'loss', 'content': 0.021544145420193672, 'timestamp': '2025-09-10 02:35:39.306956', 'step': 327, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:39.359398', 'step': 327, 'epoch': 1} +{'type': 'loss', 'content': 0.024486379697918892, 'timestamp': '2025-09-10 02:35:39.366690', 'step': 328, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:39.418967', 'step': 328, 'epoch': 1} +{'type': 'loss', 'content': 0.026301609352231026, 'timestamp': '2025-09-10 02:35:39.420887', 'step': 329, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:39.473380', 'step': 329, 'epoch': 1} +{'type': 'loss', 'content': 0.028725922107696533, 'timestamp': '2025-09-10 02:35:39.475376', 'step': 330, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:39.528704', 'step': 330, 'epoch': 1} +{'type': 'loss', 'content': 0.023373592644929886, 'timestamp': '2025-09-10 02:35:39.530647', 'step': 331, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:39.584100', 'step': 331, 'epoch': 1} +{'type': 'loss', 'content': 0.028214115649461746, 'timestamp': '2025-09-10 02:35:39.590856', 'step': 332, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:39.643890', 'step': 332, 'epoch': 1} +{'type': 'loss', 'content': 0.03408941254019737, 'timestamp': '2025-09-10 02:35:39.646652', 'step': 333, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:39.699175', 'step': 333, 'epoch': 1} +{'type': 'loss', 'content': 0.013487817719578743, 'timestamp': '2025-09-10 02:35:39.701330', 'step': 334, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:39.754014', 'step': 334, 'epoch': 1} +{'type': 'loss', 'content': 0.02832706831395626, 'timestamp': '2025-09-10 02:35:39.756016', 'step': 335, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:39.808984', 'step': 335, 'epoch': 1} +{'type': 'loss', 'content': 0.0026079160161316395, 'timestamp': '2025-09-10 02:35:39.815386', 'step': 336, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:39.869596', 'step': 336, 'epoch': 1} +{'type': 'loss', 'content': 0.008859396912157536, 'timestamp': '2025-09-10 02:35:39.871804', 'step': 337, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:39.926530', 'step': 337, 'epoch': 1} +{'type': 'loss', 'content': 0.00938789639621973, 'timestamp': '2025-09-10 02:35:39.928794', 'step': 338, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:39.981900', 'step': 338, 'epoch': 1} +{'type': 'loss', 'content': 0.006473448593169451, 'timestamp': '2025-09-10 02:35:39.984056', 'step': 339, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:40.037055', 'step': 339, 'epoch': 1} +{'type': 'loss', 'content': 0.020039811730384827, 'timestamp': '2025-09-10 02:35:40.043350', 'step': 340, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:40.095703', 'step': 340, 'epoch': 1} +{'type': 'loss', 'content': 0.02735760621726513, 'timestamp': '2025-09-10 02:35:40.101820', 'step': 341, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:35:40.155176', 'step': 341, 'epoch': 1} +{'type': 'loss', 'content': 0.01631229557096958, 'timestamp': '2025-09-10 02:35:40.157361', 'step': 342, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:40.210890', 'step': 342, 'epoch': 1} +{'type': 'loss', 'content': 0.003340591909363866, 'timestamp': '2025-09-10 02:35:40.213161', 'step': 343, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:40.266218', 'step': 343, 'epoch': 1} +{'type': 'loss', 'content': 0.012631870806217194, 'timestamp': '2025-09-10 02:35:40.272465', 'step': 344, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:40.324789', 'step': 344, 'epoch': 1} +{'type': 'loss', 'content': 0.013385455124080181, 'timestamp': '2025-09-10 02:35:40.334446', 'step': 345, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:40.387958', 'step': 345, 'epoch': 1} +{'type': 'loss', 'content': 0.024300584569573402, 'timestamp': '2025-09-10 02:35:40.390000', 'step': 346, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:40.444166', 'step': 346, 'epoch': 1} +{'type': 'loss', 'content': 0.011237295344471931, 'timestamp': '2025-09-10 02:35:40.450478', 'step': 347, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:40.505473', 'step': 347, 'epoch': 1} +{'type': 'loss', 'content': 0.01617843471467495, 'timestamp': '2025-09-10 02:35:40.511792', 'step': 348, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:40.565897', 'step': 348, 'epoch': 1} +{'type': 'loss', 'content': 0.023241808637976646, 'timestamp': '2025-09-10 02:35:40.567873', 'step': 349, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:40.620972', 'step': 349, 'epoch': 1} +{'type': 'loss', 'content': 0.010824748314917088, 'timestamp': '2025-09-10 02:35:40.623080', 'step': 350, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:40.677188', 'step': 350, 'epoch': 1} +{'type': 'loss', 'content': 0.02128290943801403, 'timestamp': '2025-09-10 02:35:40.684603', 'step': 351, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:40.737193', 'step': 351, 'epoch': 1} +{'type': 'loss', 'content': 0.003326027188450098, 'timestamp': '2025-09-10 02:35:40.743257', 'step': 352, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:40.795034', 'step': 352, 'epoch': 1} +{'type': 'loss', 'content': 0.022350847721099854, 'timestamp': '2025-09-10 02:35:40.796829', 'step': 353, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:40.848795', 'step': 353, 'epoch': 1} +{'type': 'loss', 'content': 0.01989401876926422, 'timestamp': '2025-09-10 02:35:40.851724', 'step': 354, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:40.904797', 'step': 354, 'epoch': 1} +{'type': 'loss', 'content': 0.008281980641186237, 'timestamp': '2025-09-10 02:35:40.914432', 'step': 355, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:40.973777', 'step': 355, 'epoch': 1} +{'type': 'loss', 'content': 0.012412874028086662, 'timestamp': '2025-09-10 02:35:40.985021', 'step': 356, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:41.043620', 'step': 356, 'epoch': 1} +{'type': 'loss', 'content': 0.004053934942930937, 'timestamp': '2025-09-10 02:35:41.051651', 'step': 357, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:35:41.151175', 'step': 357, 'epoch': 1} +{'type': 'loss', 'content': 0.039582908153533936, 'timestamp': '2025-09-10 02:35:41.169816', 'step': 358, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:41.222766', 'step': 358, 'epoch': 1} +{'type': 'loss', 'content': 0.004859428387135267, 'timestamp': '2025-09-10 02:35:41.224646', 'step': 359, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:41.299104', 'step': 359, 'epoch': 1} +{'type': 'loss', 'content': 0.007610120810568333, 'timestamp': '2025-09-10 02:35:41.310546', 'step': 360, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:41.364004', 'step': 360, 'epoch': 1} +{'type': 'loss', 'content': 0.004768159706145525, 'timestamp': '2025-09-10 02:35:41.366068', 'step': 361, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:41.418920', 'step': 361, 'epoch': 1} +{'type': 'loss', 'content': 0.018082713708281517, 'timestamp': '2025-09-10 02:35:41.421175', 'step': 362, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:41.474265', 'step': 362, 'epoch': 1} +{'type': 'loss', 'content': 0.021560117602348328, 'timestamp': '2025-09-10 02:35:41.476372', 'step': 363, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:41.530469', 'step': 363, 'epoch': 1} +{'type': 'loss', 'content': 0.006435369607061148, 'timestamp': '2025-09-10 02:35:41.541155', 'step': 364, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:41.596482', 'step': 364, 'epoch': 1} +{'type': 'loss', 'content': 0.015123516321182251, 'timestamp': '2025-09-10 02:35:41.598288', 'step': 365, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:41.650845', 'step': 365, 'epoch': 1} +{'type': 'loss', 'content': 0.005298522301018238, 'timestamp': '2025-09-10 02:35:41.653846', 'step': 366, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:41.706285', 'step': 366, 'epoch': 1} +{'type': 'loss', 'content': 0.012734637595713139, 'timestamp': '2025-09-10 02:35:41.708311', 'step': 367, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:41.761064', 'step': 367, 'epoch': 1} +{'type': 'loss', 'content': 0.021002424880862236, 'timestamp': '2025-09-10 02:35:41.766854', 'step': 368, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:41.818974', 'step': 368, 'epoch': 1} +{'type': 'loss', 'content': 0.016581466421484947, 'timestamp': '2025-09-10 02:35:41.820998', 'step': 369, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:41.873811', 'step': 369, 'epoch': 1} +{'type': 'loss', 'content': 0.0038347463123500347, 'timestamp': '2025-09-10 02:35:41.875842', 'step': 370, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:41.929116', 'step': 370, 'epoch': 1} +{'type': 'loss', 'content': 0.0073821512050926685, 'timestamp': '2025-09-10 02:35:41.931457', 'step': 371, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:41.984624', 'step': 371, 'epoch': 1} +{'type': 'loss', 'content': 0.008205964230000973, 'timestamp': '2025-09-10 02:35:41.993341', 'step': 372, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:42.045445', 'step': 372, 'epoch': 1} +{'type': 'loss', 'content': 0.0025237463414669037, 'timestamp': '2025-09-10 02:35:42.047508', 'step': 373, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:42.100503', 'step': 373, 'epoch': 1} +{'type': 'loss', 'content': 0.040251873433589935, 'timestamp': '2025-09-10 02:35:42.102448', 'step': 374, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.155237', 'step': 374, 'epoch': 1} +{'type': 'loss', 'content': 0.004792653955519199, 'timestamp': '2025-09-10 02:35:42.157380', 'step': 375, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:42.211929', 'step': 375, 'epoch': 1} +{'type': 'loss', 'content': 0.02295234613120556, 'timestamp': '2025-09-10 02:35:42.222528', 'step': 376, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:35:42.295572', 'step': 376, 'epoch': 1} +{'type': 'loss', 'content': 0.015930820256471634, 'timestamp': '2025-09-10 02:35:42.310942', 'step': 377, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:42.363712', 'step': 377, 'epoch': 1} +{'type': 'loss', 'content': 0.034202445298433304, 'timestamp': '2025-09-10 02:35:42.365655', 'step': 378, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.417788', 'step': 378, 'epoch': 1} +{'type': 'loss', 'content': 0.03493393212556839, 'timestamp': '2025-09-10 02:35:42.419694', 'step': 379, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:42.472219', 'step': 379, 'epoch': 1} +{'type': 'loss', 'content': 0.020121442154049873, 'timestamp': '2025-09-10 02:35:42.477945', 'step': 380, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.530060', 'step': 380, 'epoch': 1} +{'type': 'loss', 'content': 0.007878902368247509, 'timestamp': '2025-09-10 02:35:42.532168', 'step': 381, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.584357', 'step': 381, 'epoch': 1} +{'type': 'loss', 'content': 0.0033871701452881098, 'timestamp': '2025-09-10 02:35:42.586494', 'step': 382, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.639044', 'step': 382, 'epoch': 1} +{'type': 'loss', 'content': 0.012487685307860374, 'timestamp': '2025-09-10 02:35:42.641094', 'step': 383, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:42.693622', 'step': 383, 'epoch': 1} +{'type': 'loss', 'content': 0.007192258723080158, 'timestamp': '2025-09-10 02:35:42.701197', 'step': 384, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:42.757458', 'step': 384, 'epoch': 1} +{'type': 'loss', 'content': 0.02217181958258152, 'timestamp': '2025-09-10 02:35:42.768815', 'step': 385, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:42.821616', 'step': 385, 'epoch': 1} +{'type': 'loss', 'content': 0.03604666516184807, 'timestamp': '2025-09-10 02:35:42.823513', 'step': 386, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:42.877126', 'step': 386, 'epoch': 1} +{'type': 'loss', 'content': 0.008639396168291569, 'timestamp': '2025-09-10 02:35:42.879176', 'step': 387, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:35:42.939533', 'step': 387, 'epoch': 1} +{'type': 'loss', 'content': 0.016208883374929428, 'timestamp': '2025-09-10 02:35:42.951008', 'step': 388, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:43.003996', 'step': 388, 'epoch': 1} +{'type': 'loss', 'content': 0.012816510163247585, 'timestamp': '2025-09-10 02:35:43.006275', 'step': 389, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:43.059689', 'step': 389, 'epoch': 1} +{'type': 'loss', 'content': 0.013577081263065338, 'timestamp': '2025-09-10 02:35:43.061776', 'step': 390, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:43.114527', 'step': 390, 'epoch': 1} +{'type': 'loss', 'content': 0.03696885332465172, 'timestamp': '2025-09-10 02:35:43.122891', 'step': 391, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:43.175727', 'step': 391, 'epoch': 1} +{'type': 'loss', 'content': 0.026915784925222397, 'timestamp': '2025-09-10 02:35:43.181231', 'step': 392, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:43.233258', 'step': 392, 'epoch': 1} +{'type': 'loss', 'content': 0.02463557943701744, 'timestamp': '2025-09-10 02:35:43.239965', 'step': 393, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:43.292812', 'step': 393, 'epoch': 1} +{'type': 'loss', 'content': 0.020221231505274773, 'timestamp': '2025-09-10 02:35:43.299090', 'step': 394, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:43.352087', 'step': 394, 'epoch': 1} +{'type': 'loss', 'content': 0.013852830044925213, 'timestamp': '2025-09-10 02:35:43.354280', 'step': 395, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:43.408539', 'step': 395, 'epoch': 1} +{'type': 'loss', 'content': 0.01046758983284235, 'timestamp': '2025-09-10 02:35:43.419132', 'step': 396, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:43.471354', 'step': 396, 'epoch': 1} +{'type': 'loss', 'content': 0.007396661676466465, 'timestamp': '2025-09-10 02:35:43.473435', 'step': 397, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:43.526422', 'step': 397, 'epoch': 1} +{'type': 'loss', 'content': 0.0032712691463530064, 'timestamp': '2025-09-10 02:35:43.528379', 'step': 398, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:43.581736', 'step': 398, 'epoch': 1} +{'type': 'loss', 'content': 0.04277123883366585, 'timestamp': '2025-09-10 02:35:43.583408', 'step': 399, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:43.636410', 'step': 399, 'epoch': 1} +{'type': 'loss', 'content': 0.016597582027316093, 'timestamp': '2025-09-10 02:35:43.642256', 'step': 400, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:43.694545', 'step': 400, 'epoch': 1} +{'type': 'loss', 'content': 0.027218716219067574, 'timestamp': '2025-09-10 02:35:43.696268', 'step': 401, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:43.748806', 'step': 401, 'epoch': 1} +{'type': 'loss', 'content': 0.0064104353077709675, 'timestamp': '2025-09-10 02:35:43.754927', 'step': 402, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:43.809487', 'step': 402, 'epoch': 1} +{'type': 'loss', 'content': 0.016035081818699837, 'timestamp': '2025-09-10 02:35:43.819312', 'step': 403, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:43.871950', 'step': 403, 'epoch': 1} +{'type': 'loss', 'content': 0.005248712841421366, 'timestamp': '2025-09-10 02:35:43.877919', 'step': 404, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:43.929769', 'step': 404, 'epoch': 1} +{'type': 'loss', 'content': 0.019017385318875313, 'timestamp': '2025-09-10 02:35:43.931887', 'step': 405, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:43.984046', 'step': 405, 'epoch': 1} +{'type': 'loss', 'content': 0.02356548048555851, 'timestamp': '2025-09-10 02:35:43.985842', 'step': 406, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:44.038349', 'step': 406, 'epoch': 1} +{'type': 'loss', 'content': 0.014385750517249107, 'timestamp': '2025-09-10 02:35:44.040169', 'step': 407, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:44.092877', 'step': 407, 'epoch': 1} +{'type': 'loss', 'content': 0.008769115433096886, 'timestamp': '2025-09-10 02:35:44.103237', 'step': 408, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:35:44.156294', 'step': 408, 'epoch': 1} +{'type': 'loss', 'content': 0.016957219690084457, 'timestamp': '2025-09-10 02:35:44.166797', 'step': 409, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:44.219772', 'step': 409, 'epoch': 1} +{'type': 'loss', 'content': 0.025768084451556206, 'timestamp': '2025-09-10 02:35:44.221903', 'step': 410, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:44.274877', 'step': 410, 'epoch': 1} +{'type': 'loss', 'content': 0.026145832613110542, 'timestamp': '2025-09-10 02:35:44.276915', 'step': 411, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:44.329781', 'step': 411, 'epoch': 1} +{'type': 'loss', 'content': 0.004151365719735622, 'timestamp': '2025-09-10 02:35:44.335581', 'step': 412, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:35:44.387685', 'step': 412, 'epoch': 1} +{'type': 'loss', 'content': 0.02256178855895996, 'timestamp': '2025-09-10 02:35:44.389478', 'step': 413, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:44.441766', 'step': 413, 'epoch': 1} +{'type': 'loss', 'content': 0.036476414650678635, 'timestamp': '2025-09-10 02:35:44.443503', 'step': 414, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:44.495839', 'step': 414, 'epoch': 1} +{'type': 'loss', 'content': 0.022130006924271584, 'timestamp': '2025-09-10 02:35:44.498068', 'step': 415, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:44.551011', 'step': 415, 'epoch': 1} +{'type': 'loss', 'content': 0.004478184040635824, 'timestamp': '2025-09-10 02:35:44.558203', 'step': 416, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:44.610489', 'step': 416, 'epoch': 1} +{'type': 'loss', 'content': 0.02027822472155094, 'timestamp': '2025-09-10 02:35:44.617030', 'step': 417, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:44.670401', 'step': 417, 'epoch': 1} +{'type': 'loss', 'content': 0.014351091347634792, 'timestamp': '2025-09-10 02:35:44.672316', 'step': 418, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:44.725370', 'step': 418, 'epoch': 1} +{'type': 'loss', 'content': 0.02787570282816887, 'timestamp': '2025-09-10 02:35:44.731775', 'step': 419, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:44.784483', 'step': 419, 'epoch': 1} +{'type': 'loss', 'content': 0.04726407676935196, 'timestamp': '2025-09-10 02:35:44.791733', 'step': 420, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:44.845738', 'step': 420, 'epoch': 1} +{'type': 'loss', 'content': 0.04083619639277458, 'timestamp': '2025-09-10 02:35:44.853656', 'step': 421, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:44.906705', 'step': 421, 'epoch': 1} +{'type': 'loss', 'content': 0.030161907896399498, 'timestamp': '2025-09-10 02:35:44.914478', 'step': 422, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:44.966801', 'step': 422, 'epoch': 1} +{'type': 'loss', 'content': 0.01481288019567728, 'timestamp': '2025-09-10 02:35:44.973110', 'step': 423, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:45.026070', 'step': 423, 'epoch': 1} +{'type': 'loss', 'content': 0.021902069449424744, 'timestamp': '2025-09-10 02:35:45.032079', 'step': 424, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:45.084546', 'step': 424, 'epoch': 1} +{'type': 'loss', 'content': 0.007905379869043827, 'timestamp': '2025-09-10 02:35:45.086442', 'step': 425, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:45.138902', 'step': 425, 'epoch': 1} +{'type': 'loss', 'content': 0.010098838247358799, 'timestamp': '2025-09-10 02:35:45.140873', 'step': 426, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:35:45.193897', 'step': 426, 'epoch': 1} +{'type': 'loss', 'content': 0.018967723473906517, 'timestamp': '2025-09-10 02:35:45.195895', 'step': 427, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:45.248495', 'step': 427, 'epoch': 1} +{'type': 'loss', 'content': 0.01284452062100172, 'timestamp': '2025-09-10 02:35:45.255811', 'step': 428, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:35:45.308398', 'step': 428, 'epoch': 1} +{'type': 'loss', 'content': 0.004694396164268255, 'timestamp': '2025-09-10 02:35:45.316389', 'step': 429, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:35:45.370039', 'step': 429, 'epoch': 1} +{'type': 'loss', 'content': 0.007329125888645649, 'timestamp': '2025-09-10 02:35:45.376338', 'step': 430, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:45.429364', 'step': 430, 'epoch': 1} +{'type': 'loss', 'content': 0.015766726806759834, 'timestamp': '2025-09-10 02:35:45.438932', 'step': 431, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:35:45.506016', 'step': 431, 'epoch': 1} +{'type': 'loss', 'content': 0.015795400366187096, 'timestamp': '2025-09-10 02:35:45.511888', 'step': 432, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:45.564028', 'step': 432, 'epoch': 1} +{'type': 'loss', 'content': 0.033254317939281464, 'timestamp': '2025-09-10 02:35:45.566005', 'step': 433, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:35:45.618375', 'step': 433, 'epoch': 1} +{'type': 'loss', 'content': 0.03296815976500511, 'timestamp': '2025-09-10 02:35:45.620487', 'step': 434, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:45.673569', 'step': 434, 'epoch': 1} +{'type': 'loss', 'content': 0.017036251723766327, 'timestamp': '2025-09-10 02:35:45.675552', 'step': 435, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:45.728315', 'step': 435, 'epoch': 1} +{'type': 'loss', 'content': 0.008534570224583149, 'timestamp': '2025-09-10 02:35:45.734456', 'step': 436, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:35:45.786602', 'step': 436, 'epoch': 1} +{'type': 'loss', 'content': 0.009411263279616833, 'timestamp': '2025-09-10 02:35:45.796414', 'step': 437, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:35:45.854581', 'step': 437, 'epoch': 1} +{'type': 'loss', 'content': 0.04341122880578041, 'timestamp': '2025-09-10 02:35:45.865025', 'step': 438, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:35:45.918936', 'step': 438, 'epoch': 1} +{'type': 'loss', 'content': 0.012458802200853825, 'timestamp': '2025-09-10 02:35:45.921157', 'step': 439, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:35:45.973888', 'step': 439, 'epoch': 1} +{'type': 'loss', 'content': 0.011517300270497799, 'timestamp': '2025-09-10 02:35:45.979872', 'step': 440, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:35:46.032914', 'step': 440, 'epoch': 1} +{'type': 'loss', 'content': 0.00763851311057806, 'timestamp': '2025-09-10 02:35:46.034840', 'step': 441, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:36:02.719566', 'step': 441, 'epoch': 1} +{'type': 'pplx', 'content': 20480146.857568886, 'timestamp': '2025-09-10 02:36:02.722030', 'step': 441, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:02.776087', 'step': 441, 'epoch': 1} +{'type': 'loss', 'content': 0.0124233802780509, 'timestamp': '2025-09-10 02:36:02.778231', 'step': 442, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:02.831082', 'step': 442, 'epoch': 1} +{'type': 'loss', 'content': 0.020152656361460686, 'timestamp': '2025-09-10 02:36:02.833300', 'step': 443, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:02.885571', 'step': 443, 'epoch': 1} +{'type': 'loss', 'content': 0.0173350777477026, 'timestamp': '2025-09-10 02:36:02.892717', 'step': 444, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:36:02.952006', 'step': 444, 'epoch': 1} +{'type': 'loss', 'content': 0.02373490296304226, 'timestamp': '2025-09-10 02:36:02.964018', 'step': 445, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.017730', 'step': 445, 'epoch': 1} +{'type': 'loss', 'content': 0.02507105842232704, 'timestamp': '2025-09-10 02:36:03.021468', 'step': 446, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:03.078069', 'step': 446, 'epoch': 1} +{'type': 'loss', 'content': 0.019890591502189636, 'timestamp': '2025-09-10 02:36:03.080272', 'step': 447, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:03.134733', 'step': 447, 'epoch': 1} +{'type': 'loss', 'content': 0.00882682390511036, 'timestamp': '2025-09-10 02:36:03.145347', 'step': 448, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:03.197438', 'step': 448, 'epoch': 1} +{'type': 'loss', 'content': 0.022805573418736458, 'timestamp': '2025-09-10 02:36:03.205430', 'step': 449, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:03.267262', 'step': 449, 'epoch': 1} +{'type': 'loss', 'content': 0.014748402871191502, 'timestamp': '2025-09-10 02:36:03.271155', 'step': 450, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:03.323659', 'step': 450, 'epoch': 1} +{'type': 'loss', 'content': 0.015324982814490795, 'timestamp': '2025-09-10 02:36:03.326284', 'step': 451, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.378495', 'step': 451, 'epoch': 1} +{'type': 'loss', 'content': 0.016105040907859802, 'timestamp': '2025-09-10 02:36:03.385599', 'step': 452, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.440120', 'step': 452, 'epoch': 1} +{'type': 'loss', 'content': 0.016740145161747932, 'timestamp': '2025-09-10 02:36:03.442065', 'step': 453, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:03.495803', 'step': 453, 'epoch': 1} +{'type': 'loss', 'content': 0.01827307976782322, 'timestamp': '2025-09-10 02:36:03.505530', 'step': 454, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:03.558128', 'step': 454, 'epoch': 1} +{'type': 'loss', 'content': 0.016006184741854668, 'timestamp': '2025-09-10 02:36:03.564319', 'step': 455, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:03.617538', 'step': 455, 'epoch': 1} +{'type': 'loss', 'content': 0.004057266749441624, 'timestamp': '2025-09-10 02:36:03.626360', 'step': 456, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:03.677643', 'step': 456, 'epoch': 1} +{'type': 'loss', 'content': 0.011827317997813225, 'timestamp': '2025-09-10 02:36:03.680575', 'step': 457, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.732486', 'step': 457, 'epoch': 1} +{'type': 'loss', 'content': 0.011614165268838406, 'timestamp': '2025-09-10 02:36:03.734665', 'step': 458, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:03.786231', 'step': 458, 'epoch': 1} +{'type': 'loss', 'content': 0.00911747757345438, 'timestamp': '2025-09-10 02:36:03.789384', 'step': 459, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:03.841629', 'step': 459, 'epoch': 1} +{'type': 'loss', 'content': 0.013866490684449673, 'timestamp': '2025-09-10 02:36:03.847857', 'step': 460, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.899443', 'step': 460, 'epoch': 1} +{'type': 'loss', 'content': 0.01937706209719181, 'timestamp': '2025-09-10 02:36:03.901484', 'step': 461, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:03.952993', 'step': 461, 'epoch': 1} +{'type': 'loss', 'content': 0.011243008077144623, 'timestamp': '2025-09-10 02:36:03.955623', 'step': 462, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:04.013976', 'step': 462, 'epoch': 1} +{'type': 'loss', 'content': 0.02434748224914074, 'timestamp': '2025-09-10 02:36:04.024434', 'step': 463, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:04.077714', 'step': 463, 'epoch': 1} +{'type': 'loss', 'content': 0.01768920198082924, 'timestamp': '2025-09-10 02:36:04.088136', 'step': 464, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:04.143076', 'step': 464, 'epoch': 1} +{'type': 'loss', 'content': 0.02644895575940609, 'timestamp': '2025-09-10 02:36:04.145451', 'step': 465, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:04.197655', 'step': 465, 'epoch': 1} +{'type': 'loss', 'content': 0.005339294206351042, 'timestamp': '2025-09-10 02:36:04.199661', 'step': 466, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:04.251792', 'step': 466, 'epoch': 1} +{'type': 'loss', 'content': 0.017220327630639076, 'timestamp': '2025-09-10 02:36:04.254883', 'step': 467, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:04.308157', 'step': 467, 'epoch': 1} +{'type': 'loss', 'content': 0.01685364916920662, 'timestamp': '2025-09-10 02:36:04.316948', 'step': 468, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:04.368169', 'step': 468, 'epoch': 1} +{'type': 'loss', 'content': 0.023372534662485123, 'timestamp': '2025-09-10 02:36:04.374949', 'step': 469, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:04.427238', 'step': 469, 'epoch': 1} +{'type': 'loss', 'content': 0.029475973919034004, 'timestamp': '2025-09-10 02:36:04.429899', 'step': 470, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:04.500798', 'step': 470, 'epoch': 1} +{'type': 'loss', 'content': 0.02342565916478634, 'timestamp': '2025-09-10 02:36:04.507060', 'step': 471, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:04.559079', 'step': 471, 'epoch': 1} +{'type': 'loss', 'content': 0.008689814247190952, 'timestamp': '2025-09-10 02:36:04.565437', 'step': 472, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:04.617227', 'step': 472, 'epoch': 1} +{'type': 'loss', 'content': 0.01086872536689043, 'timestamp': '2025-09-10 02:36:04.619020', 'step': 473, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:04.670888', 'step': 473, 'epoch': 1} +{'type': 'loss', 'content': 0.02115866169333458, 'timestamp': '2025-09-10 02:36:04.672919', 'step': 474, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:04.725188', 'step': 474, 'epoch': 1} +{'type': 'loss', 'content': 0.038662366569042206, 'timestamp': '2025-09-10 02:36:04.727837', 'step': 475, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:04.779597', 'step': 475, 'epoch': 1} +{'type': 'loss', 'content': 0.018250806257128716, 'timestamp': '2025-09-10 02:36:04.785286', 'step': 476, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:04.839611', 'step': 476, 'epoch': 1} +{'type': 'loss', 'content': 0.014851778745651245, 'timestamp': '2025-09-10 02:36:04.841881', 'step': 477, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:04.895584', 'step': 477, 'epoch': 1} +{'type': 'loss', 'content': 0.02507244236767292, 'timestamp': '2025-09-10 02:36:04.897693', 'step': 478, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:04.949656', 'step': 478, 'epoch': 1} +{'type': 'loss', 'content': 0.0236313845962286, 'timestamp': '2025-09-10 02:36:04.956305', 'step': 479, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:05.018405', 'step': 479, 'epoch': 1} +{'type': 'loss', 'content': 0.006937531288713217, 'timestamp': '2025-09-10 02:36:05.024149', 'step': 480, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:05.081336', 'step': 480, 'epoch': 1} +{'type': 'loss', 'content': 0.011437739245593548, 'timestamp': '2025-09-10 02:36:05.084002', 'step': 481, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:05.136809', 'step': 481, 'epoch': 1} +{'type': 'loss', 'content': 0.01146687287837267, 'timestamp': '2025-09-10 02:36:05.140067', 'step': 482, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:05.192268', 'step': 482, 'epoch': 1} +{'type': 'loss', 'content': 0.013129880651831627, 'timestamp': '2025-09-10 02:36:05.194176', 'step': 483, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:05.246171', 'step': 483, 'epoch': 1} +{'type': 'loss', 'content': 0.009299784898757935, 'timestamp': '2025-09-10 02:36:05.253744', 'step': 484, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:05.305887', 'step': 484, 'epoch': 1} +{'type': 'loss', 'content': 0.028207819908857346, 'timestamp': '2025-09-10 02:36:05.308184', 'step': 485, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:05.374709', 'step': 485, 'epoch': 1} +{'type': 'loss', 'content': 0.026876013725996017, 'timestamp': '2025-09-10 02:36:05.386948', 'step': 486, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:05.439203', 'step': 486, 'epoch': 1} +{'type': 'loss', 'content': 0.005303249694406986, 'timestamp': '2025-09-10 02:36:05.441335', 'step': 487, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:05.492976', 'step': 487, 'epoch': 1} +{'type': 'loss', 'content': 0.034863557666540146, 'timestamp': '2025-09-10 02:36:05.498475', 'step': 488, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:05.549879', 'step': 488, 'epoch': 1} +{'type': 'loss', 'content': 0.02458783984184265, 'timestamp': '2025-09-10 02:36:05.552952', 'step': 489, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:36:05.624990', 'step': 489, 'epoch': 1} +{'type': 'loss', 'content': 0.024885956197977066, 'timestamp': '2025-09-10 02:36:05.637706', 'step': 490, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:05.690236', 'step': 490, 'epoch': 1} +{'type': 'loss', 'content': 0.010751993395388126, 'timestamp': '2025-09-10 02:36:05.693442', 'step': 491, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:05.753504', 'step': 491, 'epoch': 1} +{'type': 'loss', 'content': 0.016897115856409073, 'timestamp': '2025-09-10 02:36:05.764999', 'step': 492, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:05.817078', 'step': 492, 'epoch': 1} +{'type': 'loss', 'content': 0.010212777182459831, 'timestamp': '2025-09-10 02:36:05.819738', 'step': 493, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:05.871976', 'step': 493, 'epoch': 1} +{'type': 'loss', 'content': 0.016341043636202812, 'timestamp': '2025-09-10 02:36:05.874186', 'step': 494, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:05.926778', 'step': 494, 'epoch': 1} +{'type': 'loss', 'content': 0.008837589994072914, 'timestamp': '2025-09-10 02:36:05.928859', 'step': 495, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:05.980904', 'step': 495, 'epoch': 1} +{'type': 'loss', 'content': 0.014222336933016777, 'timestamp': '2025-09-10 02:36:05.986576', 'step': 496, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:06.038893', 'step': 496, 'epoch': 1} +{'type': 'loss', 'content': 0.009091884829103947, 'timestamp': '2025-09-10 02:36:06.040871', 'step': 497, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:36:06.108021', 'step': 497, 'epoch': 1} +{'type': 'loss', 'content': 0.010018201544880867, 'timestamp': '2025-09-10 02:36:06.120621', 'step': 498, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:06.172366', 'step': 498, 'epoch': 1} +{'type': 'loss', 'content': 0.008286223746836185, 'timestamp': '2025-09-10 02:36:06.174904', 'step': 499, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:06.227178', 'step': 499, 'epoch': 1} +{'type': 'loss', 'content': 0.01953982003033161, 'timestamp': '2025-09-10 02:36:06.234408', 'step': 500, 'epoch': 1} +{'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-10 02:36:06.661874', 'step': 500, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:06.716314', 'step': 500, 'epoch': 1} +{'type': 'loss', 'content': 0.00831193383783102, 'timestamp': '2025-09-10 02:36:06.718332', 'step': 501, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:06.772160', 'step': 501, 'epoch': 1} +{'type': 'loss', 'content': 0.00853397324681282, 'timestamp': '2025-09-10 02:36:06.778052', 'step': 502, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:06.831020', 'step': 502, 'epoch': 1} +{'type': 'loss', 'content': 0.031167268753051758, 'timestamp': '2025-09-10 02:36:06.833561', 'step': 503, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:06.886327', 'step': 503, 'epoch': 1} +{'type': 'loss', 'content': 0.016558272764086723, 'timestamp': '2025-09-10 02:36:06.893870', 'step': 504, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:06.952637', 'step': 504, 'epoch': 1} +{'type': 'loss', 'content': 0.018046345561742783, 'timestamp': '2025-09-10 02:36:06.964395', 'step': 505, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:07.017104', 'step': 505, 'epoch': 1} +{'type': 'loss', 'content': 0.029076406732201576, 'timestamp': '2025-09-10 02:36:07.019594', 'step': 506, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:07.071474', 'step': 506, 'epoch': 1} +{'type': 'loss', 'content': 0.034537460654973984, 'timestamp': '2025-09-10 02:36:07.074495', 'step': 507, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:07.127200', 'step': 507, 'epoch': 1} +{'type': 'loss', 'content': 0.00975030567497015, 'timestamp': '2025-09-10 02:36:07.133154', 'step': 508, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:07.184883', 'step': 508, 'epoch': 1} +{'type': 'loss', 'content': 0.014805578626692295, 'timestamp': '2025-09-10 02:36:07.194905', 'step': 509, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:07.249070', 'step': 509, 'epoch': 1} +{'type': 'loss', 'content': 0.014143081381917, 'timestamp': '2025-09-10 02:36:07.258856', 'step': 510, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:07.320233', 'step': 510, 'epoch': 1} +{'type': 'loss', 'content': 0.010913644917309284, 'timestamp': '2025-09-10 02:36:07.331138', 'step': 511, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:07.384533', 'step': 511, 'epoch': 1} +{'type': 'loss', 'content': 0.029624953866004944, 'timestamp': '2025-09-10 02:36:07.391750', 'step': 512, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:07.445189', 'step': 512, 'epoch': 1} +{'type': 'loss', 'content': 0.029218172654509544, 'timestamp': '2025-09-10 02:36:07.448118', 'step': 513, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:36:07.515811', 'step': 513, 'epoch': 1} +{'type': 'loss', 'content': 0.03561558574438095, 'timestamp': '2025-09-10 02:36:07.528394', 'step': 514, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:07.581779', 'step': 514, 'epoch': 1} +{'type': 'loss', 'content': 0.013204547576606274, 'timestamp': '2025-09-10 02:36:07.583732', 'step': 515, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:07.644002', 'step': 515, 'epoch': 1} +{'type': 'loss', 'content': 0.01040361262857914, 'timestamp': '2025-09-10 02:36:07.655535', 'step': 516, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:07.711951', 'step': 516, 'epoch': 1} +{'type': 'loss', 'content': 0.033751796931028366, 'timestamp': '2025-09-10 02:36:07.723201', 'step': 517, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:07.776678', 'step': 517, 'epoch': 1} +{'type': 'loss', 'content': 0.011231355369091034, 'timestamp': '2025-09-10 02:36:07.778759', 'step': 518, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:07.831714', 'step': 518, 'epoch': 1} +{'type': 'loss', 'content': 0.02144441194832325, 'timestamp': '2025-09-10 02:36:07.839969', 'step': 519, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:07.896218', 'step': 519, 'epoch': 1} +{'type': 'loss', 'content': 0.04766199365258217, 'timestamp': '2025-09-10 02:36:07.902017', 'step': 520, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:07.954486', 'step': 520, 'epoch': 1} +{'type': 'loss', 'content': 0.012711511924862862, 'timestamp': '2025-09-10 02:36:07.956778', 'step': 521, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:08.010041', 'step': 521, 'epoch': 1} +{'type': 'loss', 'content': 0.008068517781794071, 'timestamp': '2025-09-10 02:36:08.011975', 'step': 522, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:08.065230', 'step': 522, 'epoch': 1} +{'type': 'loss', 'content': 0.022473914548754692, 'timestamp': '2025-09-10 02:36:08.074775', 'step': 523, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:08.128131', 'step': 523, 'epoch': 1} +{'type': 'loss', 'content': 0.010958467610180378, 'timestamp': '2025-09-10 02:36:08.133655', 'step': 524, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:08.185366', 'step': 524, 'epoch': 1} +{'type': 'loss', 'content': 0.024017466232180595, 'timestamp': '2025-09-10 02:36:08.187158', 'step': 525, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:08.239100', 'step': 525, 'epoch': 1} +{'type': 'loss', 'content': 0.024791982024908066, 'timestamp': '2025-09-10 02:36:08.242352', 'step': 526, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:08.294833', 'step': 526, 'epoch': 1} +{'type': 'loss', 'content': 0.016966592520475388, 'timestamp': '2025-09-10 02:36:08.296917', 'step': 527, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:08.349888', 'step': 527, 'epoch': 1} +{'type': 'loss', 'content': 0.02015499584376812, 'timestamp': '2025-09-10 02:36:08.357326', 'step': 528, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:08.411110', 'step': 528, 'epoch': 1} +{'type': 'loss', 'content': 0.006627053488045931, 'timestamp': '2025-09-10 02:36:08.413499', 'step': 529, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:08.465237', 'step': 529, 'epoch': 1} +{'type': 'loss', 'content': 0.02907853201031685, 'timestamp': '2025-09-10 02:36:08.468039', 'step': 530, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:08.524650', 'step': 530, 'epoch': 1} +{'type': 'loss', 'content': 0.019893009215593338, 'timestamp': '2025-09-10 02:36:08.530405', 'step': 531, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:08.583041', 'step': 531, 'epoch': 1} +{'type': 'loss', 'content': 0.020581111311912537, 'timestamp': '2025-09-10 02:36:08.588893', 'step': 532, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:36:08.660550', 'step': 532, 'epoch': 1} +{'type': 'loss', 'content': 0.011427761986851692, 'timestamp': '2025-09-10 02:36:08.675465', 'step': 533, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:08.727804', 'step': 533, 'epoch': 1} +{'type': 'loss', 'content': 0.03970498591661453, 'timestamp': '2025-09-10 02:36:08.729826', 'step': 534, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:08.782080', 'step': 534, 'epoch': 1} +{'type': 'loss', 'content': 0.016118813306093216, 'timestamp': '2025-09-10 02:36:08.784035', 'step': 535, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 13760083599040.0}, 'timestamp': '2025-09-10 02:36:08.885129', 'step': 535, 'epoch': 1} +{'type': 'loss', 'content': 0.011583237908780575, 'timestamp': '2025-09-10 02:36:08.905030', 'step': 536, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:08.957150', 'step': 536, 'epoch': 1} +{'type': 'loss', 'content': 0.009176967665553093, 'timestamp': '2025-09-10 02:36:08.959175', 'step': 537, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:09.011584', 'step': 537, 'epoch': 1} +{'type': 'loss', 'content': 0.02078993245959282, 'timestamp': '2025-09-10 02:36:09.014506', 'step': 538, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:09.067594', 'step': 538, 'epoch': 1} +{'type': 'loss', 'content': 0.016172481700778008, 'timestamp': '2025-09-10 02:36:09.069633', 'step': 539, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:09.122192', 'step': 539, 'epoch': 1} +{'type': 'loss', 'content': 0.01667824201285839, 'timestamp': '2025-09-10 02:36:09.128051', 'step': 540, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:09.181493', 'step': 540, 'epoch': 1} +{'type': 'loss', 'content': 0.013945282436907291, 'timestamp': '2025-09-10 02:36:09.191113', 'step': 541, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:09.244341', 'step': 541, 'epoch': 1} +{'type': 'loss', 'content': 0.007478418760001659, 'timestamp': '2025-09-10 02:36:09.246739', 'step': 542, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:09.299215', 'step': 542, 'epoch': 1} +{'type': 'loss', 'content': 0.02043714001774788, 'timestamp': '2025-09-10 02:36:09.301507', 'step': 543, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:09.367299', 'step': 543, 'epoch': 1} +{'type': 'loss', 'content': 0.0241412166506052, 'timestamp': '2025-09-10 02:36:09.380322', 'step': 544, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:09.431888', 'step': 544, 'epoch': 1} +{'type': 'loss', 'content': 0.036015719175338745, 'timestamp': '2025-09-10 02:36:09.434069', 'step': 545, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-09-10 02:36:09.518501', 'step': 545, 'epoch': 1} +{'type': 'loss', 'content': 0.011378358118236065, 'timestamp': '2025-09-10 02:36:09.533910', 'step': 546, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:09.587319', 'step': 546, 'epoch': 1} +{'type': 'loss', 'content': 0.011544203385710716, 'timestamp': '2025-09-10 02:36:09.589563', 'step': 547, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:09.642433', 'step': 547, 'epoch': 1} +{'type': 'loss', 'content': 0.007249454967677593, 'timestamp': '2025-09-10 02:36:09.649910', 'step': 548, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:09.702271', 'step': 548, 'epoch': 1} +{'type': 'loss', 'content': 0.031287916004657745, 'timestamp': '2025-09-10 02:36:09.704461', 'step': 549, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:09.757222', 'step': 549, 'epoch': 1} +{'type': 'loss', 'content': 0.006979082711040974, 'timestamp': '2025-09-10 02:36:09.759554', 'step': 550, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:09.813347', 'step': 550, 'epoch': 1} +{'type': 'loss', 'content': 0.018987594172358513, 'timestamp': '2025-09-10 02:36:09.823181', 'step': 551, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:09.877032', 'step': 551, 'epoch': 1} +{'type': 'loss', 'content': 0.01209915429353714, 'timestamp': '2025-09-10 02:36:09.887450', 'step': 552, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:09.939490', 'step': 552, 'epoch': 1} +{'type': 'loss', 'content': 0.005028535611927509, 'timestamp': '2025-09-10 02:36:09.941459', 'step': 553, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:09.994551', 'step': 553, 'epoch': 1} +{'type': 'loss', 'content': 0.015660250559449196, 'timestamp': '2025-09-10 02:36:10.004155', 'step': 554, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:10.056382', 'step': 554, 'epoch': 1} +{'type': 'loss', 'content': 0.009114133194088936, 'timestamp': '2025-09-10 02:36:10.058550', 'step': 555, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:10.111047', 'step': 555, 'epoch': 1} +{'type': 'loss', 'content': 0.00943037960678339, 'timestamp': '2025-09-10 02:36:10.116916', 'step': 556, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:10.168778', 'step': 556, 'epoch': 1} +{'type': 'loss', 'content': 0.02232159674167633, 'timestamp': '2025-09-10 02:36:10.170956', 'step': 557, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:36:10.232156', 'step': 557, 'epoch': 1} +{'type': 'loss', 'content': 0.010626793839037418, 'timestamp': '2025-09-10 02:36:10.243225', 'step': 558, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:10.295664', 'step': 558, 'epoch': 1} +{'type': 'loss', 'content': 0.015199241228401661, 'timestamp': '2025-09-10 02:36:10.297864', 'step': 559, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:10.350348', 'step': 559, 'epoch': 1} +{'type': 'loss', 'content': 0.011815892532467842, 'timestamp': '2025-09-10 02:36:10.356200', 'step': 560, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:10.407943', 'step': 560, 'epoch': 1} +{'type': 'loss', 'content': 0.024737147614359856, 'timestamp': '2025-09-10 02:36:10.409877', 'step': 561, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:10.462886', 'step': 561, 'epoch': 1} +{'type': 'loss', 'content': 0.006226534489542246, 'timestamp': '2025-09-10 02:36:10.472481', 'step': 562, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:10.533053', 'step': 562, 'epoch': 1} +{'type': 'loss', 'content': 0.003087083576247096, 'timestamp': '2025-09-10 02:36:10.543985', 'step': 563, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:10.596768', 'step': 563, 'epoch': 1} +{'type': 'loss', 'content': 0.024480143561959267, 'timestamp': '2025-09-10 02:36:10.602552', 'step': 564, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:10.655483', 'step': 564, 'epoch': 1} +{'type': 'loss', 'content': 0.008550086989998817, 'timestamp': '2025-09-10 02:36:10.657533', 'step': 565, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:10.710065', 'step': 565, 'epoch': 1} +{'type': 'loss', 'content': 0.007321577053517103, 'timestamp': '2025-09-10 02:36:10.718065', 'step': 566, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:10.770682', 'step': 566, 'epoch': 1} +{'type': 'loss', 'content': 0.005716038402169943, 'timestamp': '2025-09-10 02:36:10.772740', 'step': 567, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:10.825461', 'step': 567, 'epoch': 1} +{'type': 'loss', 'content': 0.009402218274772167, 'timestamp': '2025-09-10 02:36:10.831256', 'step': 568, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:10.883109', 'step': 568, 'epoch': 1} +{'type': 'loss', 'content': 0.023248685523867607, 'timestamp': '2025-09-10 02:36:10.885066', 'step': 569, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:10.937806', 'step': 569, 'epoch': 1} +{'type': 'loss', 'content': 0.0069808135740458965, 'timestamp': '2025-09-10 02:36:10.944057', 'step': 570, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:10.997112', 'step': 570, 'epoch': 1} +{'type': 'loss', 'content': 0.022818030789494514, 'timestamp': '2025-09-10 02:36:10.999517', 'step': 571, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:11.052966', 'step': 571, 'epoch': 1} +{'type': 'loss', 'content': 0.014487753622233868, 'timestamp': '2025-09-10 02:36:11.058594', 'step': 572, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:36:11.133095', 'step': 572, 'epoch': 1} +{'type': 'loss', 'content': 0.018413782119750977, 'timestamp': '2025-09-10 02:36:11.147668', 'step': 573, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:11.202679', 'step': 573, 'epoch': 1} +{'type': 'loss', 'content': 0.01510600745677948, 'timestamp': '2025-09-10 02:36:11.212444', 'step': 574, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:11.265578', 'step': 574, 'epoch': 1} +{'type': 'loss', 'content': 0.010426660999655724, 'timestamp': '2025-09-10 02:36:11.267657', 'step': 575, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:11.321024', 'step': 575, 'epoch': 1} +{'type': 'loss', 'content': 0.019595544785261154, 'timestamp': '2025-09-10 02:36:11.326608', 'step': 576, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:11.378933', 'step': 576, 'epoch': 1} +{'type': 'loss', 'content': 0.017996158450841904, 'timestamp': '2025-09-10 02:36:11.386774', 'step': 577, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:11.444891', 'step': 577, 'epoch': 1} +{'type': 'loss', 'content': 0.046230610460042953, 'timestamp': '2025-09-10 02:36:11.455309', 'step': 578, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:11.508247', 'step': 578, 'epoch': 1} +{'type': 'loss', 'content': 0.010992661118507385, 'timestamp': '2025-09-10 02:36:11.510158', 'step': 579, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:11.562935', 'step': 579, 'epoch': 1} +{'type': 'loss', 'content': 0.0033359068911522627, 'timestamp': '2025-09-10 02:36:11.570313', 'step': 580, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:11.622488', 'step': 580, 'epoch': 1} +{'type': 'loss', 'content': 0.014831820502877235, 'timestamp': '2025-09-10 02:36:11.625324', 'step': 581, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:11.677671', 'step': 581, 'epoch': 1} +{'type': 'loss', 'content': 0.04670983552932739, 'timestamp': '2025-09-10 02:36:11.679904', 'step': 582, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:11.732581', 'step': 582, 'epoch': 1} +{'type': 'loss', 'content': 0.027669671922922134, 'timestamp': '2025-09-10 02:36:11.734695', 'step': 583, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:11.788004', 'step': 583, 'epoch': 1} +{'type': 'loss', 'content': 0.021408479660749435, 'timestamp': '2025-09-10 02:36:11.798623', 'step': 584, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:11.850912', 'step': 584, 'epoch': 1} +{'type': 'loss', 'content': 0.0021256650798022747, 'timestamp': '2025-09-10 02:36:11.853135', 'step': 585, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:11.905403', 'step': 585, 'epoch': 1} +{'type': 'loss', 'content': 0.01679086498916149, 'timestamp': '2025-09-10 02:36:11.913766', 'step': 586, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:11.966122', 'step': 586, 'epoch': 1} +{'type': 'loss', 'content': 0.02040312997996807, 'timestamp': '2025-09-10 02:36:11.968181', 'step': 587, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:12.020430', 'step': 587, 'epoch': 1} +{'type': 'loss', 'content': 0.004376889206469059, 'timestamp': '2025-09-10 02:36:12.026033', 'step': 588, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:36:28.785153', 'step': 588, 'epoch': 1} +{'type': 'pplx', 'content': 23128982.936499126, 'timestamp': '2025-09-10 02:36:28.787845', 'step': 588, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 16320099139776.0}, 'timestamp': '2025-09-10 02:36:28.906038', 'step': 588, 'epoch': 1} +{'type': 'loss', 'content': 0.007238972466439009, 'timestamp': '2025-09-10 02:36:28.931302', 'step': 589, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:28.986499', 'step': 589, 'epoch': 1} +{'type': 'loss', 'content': 0.016987601295113564, 'timestamp': '2025-09-10 02:36:28.988792', 'step': 590, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:29.042108', 'step': 590, 'epoch': 1} +{'type': 'loss', 'content': 0.015001409687101841, 'timestamp': '2025-09-10 02:36:29.047805', 'step': 591, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:29.102058', 'step': 591, 'epoch': 1} +{'type': 'loss', 'content': 0.009109629318118095, 'timestamp': '2025-09-10 02:36:29.112440', 'step': 592, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:29.165068', 'step': 592, 'epoch': 1} +{'type': 'loss', 'content': 0.004366945009678602, 'timestamp': '2025-09-10 02:36:29.167066', 'step': 593, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:29.219664', 'step': 593, 'epoch': 1} +{'type': 'loss', 'content': 0.007493204902857542, 'timestamp': '2025-09-10 02:36:29.222522', 'step': 594, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:29.275397', 'step': 594, 'epoch': 1} +{'type': 'loss', 'content': 0.01290935929864645, 'timestamp': '2025-09-10 02:36:29.277549', 'step': 595, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:36:29.339415', 'step': 595, 'epoch': 1} +{'type': 'loss', 'content': 0.011629750020802021, 'timestamp': '2025-09-10 02:36:29.351296', 'step': 596, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:29.404816', 'step': 596, 'epoch': 1} +{'type': 'loss', 'content': 0.009904067032039165, 'timestamp': '2025-09-10 02:36:29.407001', 'step': 597, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:29.460019', 'step': 597, 'epoch': 1} +{'type': 'loss', 'content': 0.0404568575322628, 'timestamp': '2025-09-10 02:36:29.462404', 'step': 598, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:29.515106', 'step': 598, 'epoch': 1} +{'type': 'loss', 'content': 0.012372517958283424, 'timestamp': '2025-09-10 02:36:29.517860', 'step': 599, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:29.571671', 'step': 599, 'epoch': 1} +{'type': 'loss', 'content': 0.0342080220580101, 'timestamp': '2025-09-10 02:36:29.578062', 'step': 600, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:29.630565', 'step': 600, 'epoch': 1} +{'type': 'loss', 'content': 0.0018423749133944511, 'timestamp': '2025-09-10 02:36:29.632374', 'step': 601, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:29.685412', 'step': 601, 'epoch': 1} +{'type': 'loss', 'content': 0.020080851390957832, 'timestamp': '2025-09-10 02:36:29.687488', 'step': 602, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:29.740324', 'step': 602, 'epoch': 1} +{'type': 'loss', 'content': 0.014774338342249393, 'timestamp': '2025-09-10 02:36:29.749925', 'step': 603, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:29.804789', 'step': 603, 'epoch': 1} +{'type': 'loss', 'content': 0.05874253436923027, 'timestamp': '2025-09-10 02:36:29.815332', 'step': 604, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:29.875246', 'step': 604, 'epoch': 1} +{'type': 'loss', 'content': 0.0313432440161705, 'timestamp': '2025-09-10 02:36:29.887074', 'step': 605, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:29.940181', 'step': 605, 'epoch': 1} +{'type': 'loss', 'content': 0.00326648005284369, 'timestamp': '2025-09-10 02:36:29.942425', 'step': 606, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:29.995357', 'step': 606, 'epoch': 1} +{'type': 'loss', 'content': 0.009223191998898983, 'timestamp': '2025-09-10 02:36:30.003407', 'step': 607, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:30.056785', 'step': 607, 'epoch': 1} +{'type': 'loss', 'content': 0.0024524382315576077, 'timestamp': '2025-09-10 02:36:30.063048', 'step': 608, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:30.115230', 'step': 608, 'epoch': 1} +{'type': 'loss', 'content': 0.004639843013137579, 'timestamp': '2025-09-10 02:36:30.117350', 'step': 609, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:30.170184', 'step': 609, 'epoch': 1} +{'type': 'loss', 'content': 0.00964688416570425, 'timestamp': '2025-09-10 02:36:30.172336', 'step': 610, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:30.224931', 'step': 610, 'epoch': 1} +{'type': 'loss', 'content': 0.004201619885861874, 'timestamp': '2025-09-10 02:36:30.227238', 'step': 611, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:30.280871', 'step': 611, 'epoch': 1} +{'type': 'loss', 'content': 0.028701484203338623, 'timestamp': '2025-09-10 02:36:30.287819', 'step': 612, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:30.344675', 'step': 612, 'epoch': 1} +{'type': 'loss', 'content': 0.010117202997207642, 'timestamp': '2025-09-10 02:36:30.355887', 'step': 613, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:30.410187', 'step': 613, 'epoch': 1} +{'type': 'loss', 'content': 0.00475263362750411, 'timestamp': '2025-09-10 02:36:30.418951', 'step': 614, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:30.472571', 'step': 614, 'epoch': 1} +{'type': 'loss', 'content': 0.022213634103536606, 'timestamp': '2025-09-10 02:36:30.474783', 'step': 615, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:30.534626', 'step': 615, 'epoch': 1} +{'type': 'loss', 'content': 0.006725195329636335, 'timestamp': '2025-09-10 02:36:30.546163', 'step': 616, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:30.598424', 'step': 616, 'epoch': 1} +{'type': 'loss', 'content': 0.008890500292181969, 'timestamp': '2025-09-10 02:36:30.600352', 'step': 617, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:30.652887', 'step': 617, 'epoch': 1} +{'type': 'loss', 'content': 0.002376921707764268, 'timestamp': '2025-09-10 02:36:30.655119', 'step': 618, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:30.707669', 'step': 618, 'epoch': 1} +{'type': 'loss', 'content': 0.0235095527023077, 'timestamp': '2025-09-10 02:36:30.710541', 'step': 619, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:30.763039', 'step': 619, 'epoch': 1} +{'type': 'loss', 'content': 0.01089462824165821, 'timestamp': '2025-09-10 02:36:30.770349', 'step': 620, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:30.822781', 'step': 620, 'epoch': 1} +{'type': 'loss', 'content': 0.015000701881945133, 'timestamp': '2025-09-10 02:36:30.830859', 'step': 621, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:30.883674', 'step': 621, 'epoch': 1} +{'type': 'loss', 'content': 0.001055436092428863, 'timestamp': '2025-09-10 02:36:30.885781', 'step': 622, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:30.937819', 'step': 622, 'epoch': 1} +{'type': 'loss', 'content': 0.0067395372316241264, 'timestamp': '2025-09-10 02:36:30.940756', 'step': 623, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:36:31.000356', 'step': 623, 'epoch': 1} +{'type': 'loss', 'content': 0.005097785033285618, 'timestamp': '2025-09-10 02:36:31.006103', 'step': 624, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:31.058373', 'step': 624, 'epoch': 1} +{'type': 'loss', 'content': 0.02528545819222927, 'timestamp': '2025-09-10 02:36:31.060384', 'step': 625, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:31.112756', 'step': 625, 'epoch': 1} +{'type': 'loss', 'content': 0.008685345761477947, 'timestamp': '2025-09-10 02:36:31.114953', 'step': 626, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:31.168616', 'step': 626, 'epoch': 1} +{'type': 'loss', 'content': 0.004012659192085266, 'timestamp': '2025-09-10 02:36:31.171007', 'step': 627, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:31.223491', 'step': 627, 'epoch': 1} +{'type': 'loss', 'content': 0.01806785725057125, 'timestamp': '2025-09-10 02:36:31.230736', 'step': 628, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:31.283277', 'step': 628, 'epoch': 1} +{'type': 'loss', 'content': 0.03684353828430176, 'timestamp': '2025-09-10 02:36:31.285235', 'step': 629, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:31.338120', 'step': 629, 'epoch': 1} +{'type': 'loss', 'content': 0.03313937410712242, 'timestamp': '2025-09-10 02:36:31.347733', 'step': 630, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:31.400127', 'step': 630, 'epoch': 1} +{'type': 'loss', 'content': 0.006307187490165234, 'timestamp': '2025-09-10 02:36:31.402508', 'step': 631, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:31.455215', 'step': 631, 'epoch': 1} +{'type': 'loss', 'content': 0.0016396130667999387, 'timestamp': '2025-09-10 02:36:31.461031', 'step': 632, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:31.516894', 'step': 632, 'epoch': 1} +{'type': 'loss', 'content': 0.017854390665888786, 'timestamp': '2025-09-10 02:36:31.518888', 'step': 633, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:31.571519', 'step': 633, 'epoch': 1} +{'type': 'loss', 'content': 0.05947822704911232, 'timestamp': '2025-09-10 02:36:31.573435', 'step': 634, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:31.627211', 'step': 634, 'epoch': 1} +{'type': 'loss', 'content': 0.06303369253873825, 'timestamp': '2025-09-10 02:36:31.629467', 'step': 635, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:31.682807', 'step': 635, 'epoch': 1} +{'type': 'loss', 'content': 0.010691377334296703, 'timestamp': '2025-09-10 02:36:31.693178', 'step': 636, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:31.745800', 'step': 636, 'epoch': 1} +{'type': 'loss', 'content': 0.06338184326887131, 'timestamp': '2025-09-10 02:36:31.747771', 'step': 637, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:31.800004', 'step': 637, 'epoch': 1} +{'type': 'loss', 'content': 0.03403312340378761, 'timestamp': '2025-09-10 02:36:31.802144', 'step': 638, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:31.855178', 'step': 638, 'epoch': 1} +{'type': 'loss', 'content': 0.009687677025794983, 'timestamp': '2025-09-10 02:36:31.857290', 'step': 639, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:31.911472', 'step': 639, 'epoch': 1} +{'type': 'loss', 'content': 0.0017506403382867575, 'timestamp': '2025-09-10 02:36:31.922013', 'step': 640, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:31.974950', 'step': 640, 'epoch': 1} +{'type': 'loss', 'content': 0.02189011499285698, 'timestamp': '2025-09-10 02:36:31.985455', 'step': 641, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:32.038278', 'step': 641, 'epoch': 1} +{'type': 'loss', 'content': 0.02935587801039219, 'timestamp': '2025-09-10 02:36:32.042004', 'step': 642, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:32.097238', 'step': 642, 'epoch': 1} +{'type': 'loss', 'content': 0.026329005137085915, 'timestamp': '2025-09-10 02:36:32.099303', 'step': 643, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:32.152285', 'step': 643, 'epoch': 1} +{'type': 'loss', 'content': 0.03041278012096882, 'timestamp': '2025-09-10 02:36:32.162696', 'step': 644, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:32.214655', 'step': 644, 'epoch': 1} +{'type': 'loss', 'content': 0.00822833739221096, 'timestamp': '2025-09-10 02:36:32.216880', 'step': 645, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:32.269165', 'step': 645, 'epoch': 1} +{'type': 'loss', 'content': 0.013744184747338295, 'timestamp': '2025-09-10 02:36:32.279979', 'step': 646, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:32.334900', 'step': 646, 'epoch': 1} +{'type': 'loss', 'content': 0.019500361755490303, 'timestamp': '2025-09-10 02:36:32.337562', 'step': 647, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:32.390147', 'step': 647, 'epoch': 1} +{'type': 'loss', 'content': 0.0037625690456479788, 'timestamp': '2025-09-10 02:36:32.396092', 'step': 648, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:32.448397', 'step': 648, 'epoch': 1} +{'type': 'loss', 'content': 0.011067330837249756, 'timestamp': '2025-09-10 02:36:32.450449', 'step': 649, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:32.516392', 'step': 649, 'epoch': 1} +{'type': 'loss', 'content': 0.022867491468787193, 'timestamp': '2025-09-10 02:36:32.528555', 'step': 650, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:32.582179', 'step': 650, 'epoch': 1} +{'type': 'loss', 'content': 0.003159865038469434, 'timestamp': '2025-09-10 02:36:32.584348', 'step': 651, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:32.636630', 'step': 651, 'epoch': 1} +{'type': 'loss', 'content': 0.009259316138923168, 'timestamp': '2025-09-10 02:36:32.642416', 'step': 652, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:32.701591', 'step': 652, 'epoch': 1} +{'type': 'loss', 'content': 0.020939616486430168, 'timestamp': '2025-09-10 02:36:32.713354', 'step': 653, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:32.766577', 'step': 653, 'epoch': 1} +{'type': 'loss', 'content': 0.033403415232896805, 'timestamp': '2025-09-10 02:36:32.768710', 'step': 654, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:32.821144', 'step': 654, 'epoch': 1} +{'type': 'loss', 'content': 0.013647467829287052, 'timestamp': '2025-09-10 02:36:32.827577', 'step': 655, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:32.881267', 'step': 655, 'epoch': 1} +{'type': 'loss', 'content': 0.006164844613522291, 'timestamp': '2025-09-10 02:36:32.891662', 'step': 656, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:32.944341', 'step': 656, 'epoch': 1} +{'type': 'loss', 'content': 0.01957377977669239, 'timestamp': '2025-09-10 02:36:32.954831', 'step': 657, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:33.007542', 'step': 657, 'epoch': 1} +{'type': 'loss', 'content': 0.04672471061348915, 'timestamp': '2025-09-10 02:36:33.009473', 'step': 658, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:33.061937', 'step': 658, 'epoch': 1} +{'type': 'loss', 'content': 0.016188517212867737, 'timestamp': '2025-09-10 02:36:33.064534', 'step': 659, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:33.116889', 'step': 659, 'epoch': 1} +{'type': 'loss', 'content': 0.04000651836395264, 'timestamp': '2025-09-10 02:36:33.122853', 'step': 660, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:33.175080', 'step': 660, 'epoch': 1} +{'type': 'loss', 'content': 0.01387543324381113, 'timestamp': '2025-09-10 02:36:33.177065', 'step': 661, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:33.229910', 'step': 661, 'epoch': 1} +{'type': 'loss', 'content': 0.01652139239013195, 'timestamp': '2025-09-10 02:36:33.239781', 'step': 662, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:33.292797', 'step': 662, 'epoch': 1} +{'type': 'loss', 'content': 0.021793778985738754, 'timestamp': '2025-09-10 02:36:33.306016', 'step': 663, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:33.365074', 'step': 663, 'epoch': 1} +{'type': 'loss', 'content': 0.016380032524466515, 'timestamp': '2025-09-10 02:36:33.375489', 'step': 664, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:33.435226', 'step': 664, 'epoch': 1} +{'type': 'loss', 'content': 0.009158218279480934, 'timestamp': '2025-09-10 02:36:33.438264', 'step': 665, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:33.496788', 'step': 665, 'epoch': 1} +{'type': 'loss', 'content': 0.029515789821743965, 'timestamp': '2025-09-10 02:36:33.499204', 'step': 666, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:33.558996', 'step': 666, 'epoch': 1} +{'type': 'loss', 'content': 0.02951805293560028, 'timestamp': '2025-09-10 02:36:33.566622', 'step': 667, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:33.628662', 'step': 667, 'epoch': 1} +{'type': 'loss', 'content': 0.00808575190603733, 'timestamp': '2025-09-10 02:36:33.635127', 'step': 668, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:33.689360', 'step': 668, 'epoch': 1} +{'type': 'loss', 'content': 0.017804021015763283, 'timestamp': '2025-09-10 02:36:33.691517', 'step': 669, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:33.746033', 'step': 669, 'epoch': 1} +{'type': 'loss', 'content': 0.035620737820863724, 'timestamp': '2025-09-10 02:36:33.751120', 'step': 670, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:33.805083', 'step': 670, 'epoch': 1} +{'type': 'loss', 'content': 0.009280617348849773, 'timestamp': '2025-09-10 02:36:33.809737', 'step': 671, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:33.871574', 'step': 671, 'epoch': 1} +{'type': 'loss', 'content': 0.009552262723445892, 'timestamp': '2025-09-10 02:36:33.882756', 'step': 672, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:33.943019', 'step': 672, 'epoch': 1} +{'type': 'loss', 'content': 0.017052967101335526, 'timestamp': '2025-09-10 02:36:33.945794', 'step': 673, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:36:34.019672', 'step': 673, 'epoch': 1} +{'type': 'loss', 'content': 0.034075286239385605, 'timestamp': '2025-09-10 02:36:34.032318', 'step': 674, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:34.102378', 'step': 674, 'epoch': 1} +{'type': 'loss', 'content': 0.019345467910170555, 'timestamp': '2025-09-10 02:36:34.111256', 'step': 675, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:34.166556', 'step': 675, 'epoch': 1} +{'type': 'loss', 'content': 0.015054549090564251, 'timestamp': '2025-09-10 02:36:34.173281', 'step': 676, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:34.243342', 'step': 676, 'epoch': 1} +{'type': 'loss', 'content': 0.009510613977909088, 'timestamp': '2025-09-10 02:36:34.246974', 'step': 677, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:34.301564', 'step': 677, 'epoch': 1} +{'type': 'loss', 'content': 0.027910476550459862, 'timestamp': '2025-09-10 02:36:34.305992', 'step': 678, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:34.362296', 'step': 678, 'epoch': 1} +{'type': 'loss', 'content': 0.00868389755487442, 'timestamp': '2025-09-10 02:36:34.364412', 'step': 679, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:34.438507', 'step': 679, 'epoch': 1} +{'type': 'loss', 'content': 0.01145507674664259, 'timestamp': '2025-09-10 02:36:34.453410', 'step': 680, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:34.521391', 'step': 680, 'epoch': 1} +{'type': 'loss', 'content': 0.01435221266001463, 'timestamp': '2025-09-10 02:36:34.531793', 'step': 681, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:34.585549', 'step': 681, 'epoch': 1} +{'type': 'loss', 'content': 0.008981848135590553, 'timestamp': '2025-09-10 02:36:34.591528', 'step': 682, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:34.659149', 'step': 682, 'epoch': 1} +{'type': 'loss', 'content': 0.02495545521378517, 'timestamp': '2025-09-10 02:36:34.661918', 'step': 683, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:34.715720', 'step': 683, 'epoch': 1} +{'type': 'loss', 'content': 0.02103118598461151, 'timestamp': '2025-09-10 02:36:34.722918', 'step': 684, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:34.789236', 'step': 684, 'epoch': 1} +{'type': 'loss', 'content': 0.022110581398010254, 'timestamp': '2025-09-10 02:36:34.802428', 'step': 685, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:34.870682', 'step': 685, 'epoch': 1} +{'type': 'loss', 'content': 0.026706701144576073, 'timestamp': '2025-09-10 02:36:34.873551', 'step': 686, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:34.927608', 'step': 686, 'epoch': 1} +{'type': 'loss', 'content': 0.019403820857405663, 'timestamp': '2025-09-10 02:36:34.929951', 'step': 687, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:34.987799', 'step': 687, 'epoch': 1} +{'type': 'loss', 'content': 0.015205049887299538, 'timestamp': '2025-09-10 02:36:35.003554', 'step': 688, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:35.064158', 'step': 688, 'epoch': 1} +{'type': 'loss', 'content': 0.025768402963876724, 'timestamp': '2025-09-10 02:36:35.073503', 'step': 689, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:35.130980', 'step': 689, 'epoch': 1} +{'type': 'loss', 'content': 0.014641908928751945, 'timestamp': '2025-09-10 02:36:35.140619', 'step': 690, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:35.237956', 'step': 690, 'epoch': 1} +{'type': 'loss', 'content': 0.015722578391432762, 'timestamp': '2025-09-10 02:36:35.243068', 'step': 691, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:35.304138', 'step': 691, 'epoch': 1} +{'type': 'loss', 'content': 0.012852429412305355, 'timestamp': '2025-09-10 02:36:35.310116', 'step': 692, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:35.363036', 'step': 692, 'epoch': 1} +{'type': 'loss', 'content': 0.0215513464063406, 'timestamp': '2025-09-10 02:36:35.364999', 'step': 693, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:35.417127', 'step': 693, 'epoch': 1} +{'type': 'loss', 'content': 0.009072760120034218, 'timestamp': '2025-09-10 02:36:35.420032', 'step': 694, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:35.472778', 'step': 694, 'epoch': 1} +{'type': 'loss', 'content': 0.01474462728947401, 'timestamp': '2025-09-10 02:36:35.475181', 'step': 695, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:36:35.544437', 'step': 695, 'epoch': 1} +{'type': 'loss', 'content': 0.02467481978237629, 'timestamp': '2025-09-10 02:36:35.557932', 'step': 696, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:35.610313', 'step': 696, 'epoch': 1} +{'type': 'loss', 'content': 0.007700375281274319, 'timestamp': '2025-09-10 02:36:35.618466', 'step': 697, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:35.672719', 'step': 697, 'epoch': 1} +{'type': 'loss', 'content': 0.010380787774920464, 'timestamp': '2025-09-10 02:36:35.674748', 'step': 698, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:35.729032', 'step': 698, 'epoch': 1} +{'type': 'loss', 'content': 0.02106519415974617, 'timestamp': '2025-09-10 02:36:35.731103', 'step': 699, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:35.785861', 'step': 699, 'epoch': 1} +{'type': 'loss', 'content': 0.02622860297560692, 'timestamp': '2025-09-10 02:36:35.792038', 'step': 700, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:35.845270', 'step': 700, 'epoch': 1} +{'type': 'loss', 'content': 0.013282016851007938, 'timestamp': '2025-09-10 02:36:35.855743', 'step': 701, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:35.909897', 'step': 701, 'epoch': 1} +{'type': 'loss', 'content': 0.013056610710918903, 'timestamp': '2025-09-10 02:36:35.911874', 'step': 702, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:35.964540', 'step': 702, 'epoch': 1} +{'type': 'loss', 'content': 0.02552831545472145, 'timestamp': '2025-09-10 02:36:35.972621', 'step': 703, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:36.025437', 'step': 703, 'epoch': 1} +{'type': 'loss', 'content': 0.02205856330692768, 'timestamp': '2025-09-10 02:36:36.031114', 'step': 704, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:36.084678', 'step': 704, 'epoch': 1} +{'type': 'loss', 'content': 0.00926896370947361, 'timestamp': '2025-09-10 02:36:36.095112', 'step': 705, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:36.147786', 'step': 705, 'epoch': 1} +{'type': 'loss', 'content': 0.00949943345040083, 'timestamp': '2025-09-10 02:36:36.150059', 'step': 706, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:36.203676', 'step': 706, 'epoch': 1} +{'type': 'loss', 'content': 0.014659402891993523, 'timestamp': '2025-09-10 02:36:36.212800', 'step': 707, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:36.270135', 'step': 707, 'epoch': 1} +{'type': 'loss', 'content': 0.016407150775194168, 'timestamp': '2025-09-10 02:36:36.281343', 'step': 708, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:36.333802', 'step': 708, 'epoch': 1} +{'type': 'loss', 'content': 0.014719000086188316, 'timestamp': '2025-09-10 02:36:36.335765', 'step': 709, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:36.389778', 'step': 709, 'epoch': 1} +{'type': 'loss', 'content': 0.018781915307044983, 'timestamp': '2025-09-10 02:36:36.391976', 'step': 710, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:36.449608', 'step': 710, 'epoch': 1} +{'type': 'loss', 'content': 0.01804245077073574, 'timestamp': '2025-09-10 02:36:36.460017', 'step': 711, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:36.513781', 'step': 711, 'epoch': 1} +{'type': 'loss', 'content': 0.015068510547280312, 'timestamp': '2025-09-10 02:36:36.519883', 'step': 712, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:36.573010', 'step': 712, 'epoch': 1} +{'type': 'loss', 'content': 0.017627691850066185, 'timestamp': '2025-09-10 02:36:36.578574', 'step': 713, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:36.632003', 'step': 713, 'epoch': 1} +{'type': 'loss', 'content': 0.016136599704623222, 'timestamp': '2025-09-10 02:36:36.634254', 'step': 714, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:36.687693', 'step': 714, 'epoch': 1} +{'type': 'loss', 'content': 0.009769621305167675, 'timestamp': '2025-09-10 02:36:36.689891', 'step': 715, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:36.742774', 'step': 715, 'epoch': 1} +{'type': 'loss', 'content': 0.01921756938099861, 'timestamp': '2025-09-10 02:36:36.749819', 'step': 716, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:36.808788', 'step': 716, 'epoch': 1} +{'type': 'loss', 'content': 0.006998592056334019, 'timestamp': '2025-09-10 02:36:36.820335', 'step': 717, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:36.872717', 'step': 717, 'epoch': 1} +{'type': 'loss', 'content': 0.01364762894809246, 'timestamp': '2025-09-10 02:36:36.880834', 'step': 718, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:36.947103', 'step': 718, 'epoch': 1} +{'type': 'loss', 'content': 0.0220673568546772, 'timestamp': '2025-09-10 02:36:36.959332', 'step': 719, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:37.012469', 'step': 719, 'epoch': 1} +{'type': 'loss', 'content': 0.011288968846201897, 'timestamp': '2025-09-10 02:36:37.021549', 'step': 720, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:37.073860', 'step': 720, 'epoch': 1} +{'type': 'loss', 'content': 0.005582908634096384, 'timestamp': '2025-09-10 02:36:37.076040', 'step': 721, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:37.128587', 'step': 721, 'epoch': 1} +{'type': 'loss', 'content': 0.029105016961693764, 'timestamp': '2025-09-10 02:36:37.136904', 'step': 722, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:37.189685', 'step': 722, 'epoch': 1} +{'type': 'loss', 'content': 0.015418858267366886, 'timestamp': '2025-09-10 02:36:37.191939', 'step': 723, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:37.244895', 'step': 723, 'epoch': 1} +{'type': 'loss', 'content': 0.018920283764600754, 'timestamp': '2025-09-10 02:36:37.250667', 'step': 724, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:37.303815', 'step': 724, 'epoch': 1} +{'type': 'loss', 'content': 0.029933378100395203, 'timestamp': '2025-09-10 02:36:37.305856', 'step': 725, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:37.358653', 'step': 725, 'epoch': 1} +{'type': 'loss', 'content': 0.010641084052622318, 'timestamp': '2025-09-10 02:36:37.360840', 'step': 726, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:37.413621', 'step': 726, 'epoch': 1} +{'type': 'loss', 'content': 0.019988510757684708, 'timestamp': '2025-09-10 02:36:37.415458', 'step': 727, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:37.468347', 'step': 727, 'epoch': 1} +{'type': 'loss', 'content': 0.02792045660316944, 'timestamp': '2025-09-10 02:36:37.474448', 'step': 728, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:36:37.540184', 'step': 728, 'epoch': 1} +{'type': 'loss', 'content': 0.0059188418090343475, 'timestamp': '2025-09-10 02:36:37.553811', 'step': 729, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:37.607463', 'step': 729, 'epoch': 1} +{'type': 'loss', 'content': 0.013464928604662418, 'timestamp': '2025-09-10 02:36:37.609820', 'step': 730, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:37.663794', 'step': 730, 'epoch': 1} +{'type': 'loss', 'content': 0.004084447864443064, 'timestamp': '2025-09-10 02:36:37.673635', 'step': 731, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:37.726595', 'step': 731, 'epoch': 1} +{'type': 'loss', 'content': 0.01191288884729147, 'timestamp': '2025-09-10 02:36:37.732270', 'step': 732, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:37.784313', 'step': 732, 'epoch': 1} +{'type': 'loss', 'content': 0.0049387919716537, 'timestamp': '2025-09-10 02:36:37.792278', 'step': 733, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:37.844978', 'step': 733, 'epoch': 1} +{'type': 'loss', 'content': 0.011774830520153046, 'timestamp': '2025-09-10 02:36:37.846891', 'step': 734, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:37.899891', 'step': 734, 'epoch': 1} +{'type': 'loss', 'content': 0.024851668626070023, 'timestamp': '2025-09-10 02:36:37.901835', 'step': 735, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:36:54.667663', 'step': 735, 'epoch': 1} +{'type': 'pplx', 'content': 21917958.077079695, 'timestamp': '2025-09-10 02:36:54.670400', 'step': 735, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:54.727380', 'step': 735, 'epoch': 1} +{'type': 'loss', 'content': 0.020502442494034767, 'timestamp': '2025-09-10 02:36:54.738502', 'step': 736, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:54.803573', 'step': 736, 'epoch': 1} +{'type': 'loss', 'content': 0.015638506039977074, 'timestamp': '2025-09-10 02:36:54.816701', 'step': 737, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:54.870020', 'step': 737, 'epoch': 1} +{'type': 'loss', 'content': 0.00676635792478919, 'timestamp': '2025-09-10 02:36:54.871943', 'step': 738, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:54.924485', 'step': 738, 'epoch': 1} +{'type': 'loss', 'content': 0.008608072064816952, 'timestamp': '2025-09-10 02:36:54.927329', 'step': 739, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:54.979296', 'step': 739, 'epoch': 1} +{'type': 'loss', 'content': 0.00817218329757452, 'timestamp': '2025-09-10 02:36:54.985279', 'step': 740, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:55.037487', 'step': 740, 'epoch': 1} +{'type': 'loss', 'content': 0.023427298292517662, 'timestamp': '2025-09-10 02:36:55.039930', 'step': 741, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:55.093074', 'step': 741, 'epoch': 1} +{'type': 'loss', 'content': 0.027461376041173935, 'timestamp': '2025-09-10 02:36:55.095209', 'step': 742, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:55.147794', 'step': 742, 'epoch': 1} +{'type': 'loss', 'content': 0.0030104925390332937, 'timestamp': '2025-09-10 02:36:55.149737', 'step': 743, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:55.202263', 'step': 743, 'epoch': 1} +{'type': 'loss', 'content': 0.0279100202023983, 'timestamp': '2025-09-10 02:36:55.208136', 'step': 744, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:55.260953', 'step': 744, 'epoch': 1} +{'type': 'loss', 'content': 0.009477927349507809, 'timestamp': '2025-09-10 02:36:55.262916', 'step': 745, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:55.317341', 'step': 745, 'epoch': 1} +{'type': 'loss', 'content': 0.021144181489944458, 'timestamp': '2025-09-10 02:36:55.327145', 'step': 746, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:55.380394', 'step': 746, 'epoch': 1} +{'type': 'loss', 'content': 0.0071835569106042385, 'timestamp': '2025-09-10 02:36:55.382474', 'step': 747, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:55.435171', 'step': 747, 'epoch': 1} +{'type': 'loss', 'content': 0.015094847418367863, 'timestamp': '2025-09-10 02:36:55.442413', 'step': 748, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:55.494682', 'step': 748, 'epoch': 1} +{'type': 'loss', 'content': 0.014481599442660809, 'timestamp': '2025-09-10 02:36:55.497028', 'step': 749, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:55.554558', 'step': 749, 'epoch': 1} +{'type': 'loss', 'content': 0.0032352295238524675, 'timestamp': '2025-09-10 02:36:55.564980', 'step': 750, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:55.619465', 'step': 750, 'epoch': 1} +{'type': 'loss', 'content': 0.008319184184074402, 'timestamp': '2025-09-10 02:36:55.621413', 'step': 751, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:55.673805', 'step': 751, 'epoch': 1} +{'type': 'loss', 'content': 0.001629153499379754, 'timestamp': '2025-09-10 02:36:55.679541', 'step': 752, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:55.732181', 'step': 752, 'epoch': 1} +{'type': 'loss', 'content': 0.013639301992952824, 'timestamp': '2025-09-10 02:36:55.740543', 'step': 753, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:55.793714', 'step': 753, 'epoch': 1} +{'type': 'loss', 'content': 0.0051920460537076, 'timestamp': '2025-09-10 02:36:55.802009', 'step': 754, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:55.855478', 'step': 754, 'epoch': 1} +{'type': 'loss', 'content': 0.0006103878258727491, 'timestamp': '2025-09-10 02:36:55.858301', 'step': 755, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:55.911629', 'step': 755, 'epoch': 1} +{'type': 'loss', 'content': 0.014334793202579021, 'timestamp': '2025-09-10 02:36:55.917623', 'step': 756, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:55.969951', 'step': 756, 'epoch': 1} +{'type': 'loss', 'content': 0.01660270243883133, 'timestamp': '2025-09-10 02:36:55.971904', 'step': 757, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:36:56.045152', 'step': 757, 'epoch': 1} +{'type': 'loss', 'content': 0.03916054591536522, 'timestamp': '2025-09-10 02:36:56.058862', 'step': 758, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:56.113331', 'step': 758, 'epoch': 1} +{'type': 'loss', 'content': 0.057445406913757324, 'timestamp': '2025-09-10 02:36:56.123151', 'step': 759, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:56.177670', 'step': 759, 'epoch': 1} +{'type': 'loss', 'content': 0.04359531030058861, 'timestamp': '2025-09-10 02:36:56.183533', 'step': 760, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:56.234980', 'step': 760, 'epoch': 1} +{'type': 'loss', 'content': 0.016469566151499748, 'timestamp': '2025-09-10 02:36:56.237128', 'step': 761, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:56.291946', 'step': 761, 'epoch': 1} +{'type': 'loss', 'content': 0.021483054384589195, 'timestamp': '2025-09-10 02:36:56.301716', 'step': 762, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:36:56.362917', 'step': 762, 'epoch': 1} +{'type': 'loss', 'content': 0.007063302677124739, 'timestamp': '2025-09-10 02:36:56.373986', 'step': 763, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:56.427233', 'step': 763, 'epoch': 1} +{'type': 'loss', 'content': 0.007045641075819731, 'timestamp': '2025-09-10 02:36:56.434308', 'step': 764, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:56.486772', 'step': 764, 'epoch': 1} +{'type': 'loss', 'content': 0.035644493997097015, 'timestamp': '2025-09-10 02:36:56.488986', 'step': 765, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:56.541941', 'step': 765, 'epoch': 1} +{'type': 'loss', 'content': 0.009285603649914265, 'timestamp': '2025-09-10 02:36:56.544150', 'step': 766, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:56.610635', 'step': 766, 'epoch': 1} +{'type': 'loss', 'content': 0.0031007337383925915, 'timestamp': '2025-09-10 02:36:56.622836', 'step': 767, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:56.675781', 'step': 767, 'epoch': 1} +{'type': 'loss', 'content': 0.009259874001145363, 'timestamp': '2025-09-10 02:36:56.681806', 'step': 768, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:56.733740', 'step': 768, 'epoch': 1} +{'type': 'loss', 'content': 0.01143709011375904, 'timestamp': '2025-09-10 02:36:56.736082', 'step': 769, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:56.789671', 'step': 769, 'epoch': 1} +{'type': 'loss', 'content': 0.017447659745812416, 'timestamp': '2025-09-10 02:36:56.799278', 'step': 770, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:56.852742', 'step': 770, 'epoch': 1} +{'type': 'loss', 'content': 0.01954110898077488, 'timestamp': '2025-09-10 02:36:56.854921', 'step': 771, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:56.907448', 'step': 771, 'epoch': 1} +{'type': 'loss', 'content': 0.016713453456759453, 'timestamp': '2025-09-10 02:36:56.913290', 'step': 772, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:56.965105', 'step': 772, 'epoch': 1} +{'type': 'loss', 'content': 0.019992290064692497, 'timestamp': '2025-09-10 02:36:56.975387', 'step': 773, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:57.028370', 'step': 773, 'epoch': 1} +{'type': 'loss', 'content': 0.00534540181979537, 'timestamp': '2025-09-10 02:36:57.030486', 'step': 774, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:57.083622', 'step': 774, 'epoch': 1} +{'type': 'loss', 'content': 0.010700692422688007, 'timestamp': '2025-09-10 02:36:57.085776', 'step': 775, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:57.138182', 'step': 775, 'epoch': 1} +{'type': 'loss', 'content': 0.03621980547904968, 'timestamp': '2025-09-10 02:36:57.144186', 'step': 776, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:57.196717', 'step': 776, 'epoch': 1} +{'type': 'loss', 'content': 0.01841093972325325, 'timestamp': '2025-09-10 02:36:57.198808', 'step': 777, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:57.251782', 'step': 777, 'epoch': 1} +{'type': 'loss', 'content': 0.009724624454975128, 'timestamp': '2025-09-10 02:36:57.259937', 'step': 778, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:57.312696', 'step': 778, 'epoch': 1} +{'type': 'loss', 'content': 0.005586319603025913, 'timestamp': '2025-09-10 02:36:57.314785', 'step': 779, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:57.368462', 'step': 779, 'epoch': 1} +{'type': 'loss', 'content': 0.012120272032916546, 'timestamp': '2025-09-10 02:36:57.374242', 'step': 780, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:57.426180', 'step': 780, 'epoch': 1} +{'type': 'loss', 'content': 0.0129274632781744, 'timestamp': '2025-09-10 02:36:57.429033', 'step': 781, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:57.489694', 'step': 781, 'epoch': 1} +{'type': 'loss', 'content': 0.014876616187393665, 'timestamp': '2025-09-10 02:36:57.500443', 'step': 782, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:57.554161', 'step': 782, 'epoch': 1} +{'type': 'loss', 'content': 0.014843541197478771, 'timestamp': '2025-09-10 02:36:57.556520', 'step': 783, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:36:57.624567', 'step': 783, 'epoch': 1} +{'type': 'loss', 'content': 0.004676350392401218, 'timestamp': '2025-09-10 02:36:57.637826', 'step': 784, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:36:57.691802', 'step': 784, 'epoch': 1} +{'type': 'loss', 'content': 0.005307146813720465, 'timestamp': '2025-09-10 02:36:57.701408', 'step': 785, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:36:57.768054', 'step': 785, 'epoch': 1} +{'type': 'loss', 'content': 0.03211702033877373, 'timestamp': '2025-09-10 02:36:57.780244', 'step': 786, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:57.833669', 'step': 786, 'epoch': 1} +{'type': 'loss', 'content': 0.04485854133963585, 'timestamp': '2025-09-10 02:36:57.835935', 'step': 787, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:36:57.896789', 'step': 787, 'epoch': 1} +{'type': 'loss', 'content': 0.007537003606557846, 'timestamp': '2025-09-10 02:36:57.908482', 'step': 788, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:57.961411', 'step': 788, 'epoch': 1} +{'type': 'loss', 'content': 0.003232221584767103, 'timestamp': '2025-09-10 02:36:57.963679', 'step': 789, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:58.016910', 'step': 789, 'epoch': 1} +{'type': 'loss', 'content': 0.02600998990237713, 'timestamp': '2025-09-10 02:36:58.019180', 'step': 790, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:36:58.100801', 'step': 790, 'epoch': 1} +{'type': 'loss', 'content': 0.01314751710742712, 'timestamp': '2025-09-10 02:36:58.115804', 'step': 791, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:58.169976', 'step': 791, 'epoch': 1} +{'type': 'loss', 'content': 0.011830405332148075, 'timestamp': '2025-09-10 02:36:58.175921', 'step': 792, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:58.228540', 'step': 792, 'epoch': 1} +{'type': 'loss', 'content': 0.01234391424804926, 'timestamp': '2025-09-10 02:36:58.230782', 'step': 793, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:58.283727', 'step': 793, 'epoch': 1} +{'type': 'loss', 'content': 0.02767784334719181, 'timestamp': '2025-09-10 02:36:58.285952', 'step': 794, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:58.338954', 'step': 794, 'epoch': 1} +{'type': 'loss', 'content': 0.0014593282248824835, 'timestamp': '2025-09-10 02:36:58.341214', 'step': 795, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:58.399241', 'step': 795, 'epoch': 1} +{'type': 'loss', 'content': 0.047555647790431976, 'timestamp': '2025-09-10 02:36:58.410435', 'step': 796, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:58.469602', 'step': 796, 'epoch': 1} +{'type': 'loss', 'content': 0.01425406988710165, 'timestamp': '2025-09-10 02:36:58.481158', 'step': 797, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:58.534786', 'step': 797, 'epoch': 1} +{'type': 'loss', 'content': 0.005846301559358835, 'timestamp': '2025-09-10 02:36:58.536794', 'step': 798, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:58.589682', 'step': 798, 'epoch': 1} +{'type': 'loss', 'content': 0.02180892787873745, 'timestamp': '2025-09-10 02:36:58.591800', 'step': 799, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:58.645066', 'step': 799, 'epoch': 1} +{'type': 'loss', 'content': 0.01302596740424633, 'timestamp': '2025-09-10 02:36:58.651430', 'step': 800, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:36:58.704127', 'step': 800, 'epoch': 1} +{'type': 'loss', 'content': 0.016773927956819534, 'timestamp': '2025-09-10 02:36:58.706882', 'step': 801, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:36:58.760906', 'step': 801, 'epoch': 1} +{'type': 'loss', 'content': 0.004892075899988413, 'timestamp': '2025-09-10 02:36:58.770673', 'step': 802, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:58.824302', 'step': 802, 'epoch': 1} +{'type': 'loss', 'content': 0.002840865170583129, 'timestamp': '2025-09-10 02:36:58.826350', 'step': 803, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:58.886195', 'step': 803, 'epoch': 1} +{'type': 'loss', 'content': 0.018173635005950928, 'timestamp': '2025-09-10 02:36:58.897720', 'step': 804, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:58.950440', 'step': 804, 'epoch': 1} +{'type': 'loss', 'content': 0.025356922298669815, 'timestamp': '2025-09-10 02:36:58.952766', 'step': 805, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:36:59.006043', 'step': 805, 'epoch': 1} +{'type': 'loss', 'content': 0.0071188295260071754, 'timestamp': '2025-09-10 02:36:59.013674', 'step': 806, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:59.066723', 'step': 806, 'epoch': 1} +{'type': 'loss', 'content': 0.0071905506774783134, 'timestamp': '2025-09-10 02:36:59.068774', 'step': 807, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:59.121937', 'step': 807, 'epoch': 1} +{'type': 'loss', 'content': 0.010201232507824898, 'timestamp': '2025-09-10 02:36:59.128204', 'step': 808, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:59.181164', 'step': 808, 'epoch': 1} +{'type': 'loss', 'content': 0.0006716384668834507, 'timestamp': '2025-09-10 02:36:59.183382', 'step': 809, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:36:59.237100', 'step': 809, 'epoch': 1} +{'type': 'loss', 'content': 0.009294861927628517, 'timestamp': '2025-09-10 02:36:59.239346', 'step': 810, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:36:59.292794', 'step': 810, 'epoch': 1} +{'type': 'loss', 'content': 0.009038268588483334, 'timestamp': '2025-09-10 02:36:59.295129', 'step': 811, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:59.355506', 'step': 811, 'epoch': 1} +{'type': 'loss', 'content': 0.01586053892970085, 'timestamp': '2025-09-10 02:36:59.367026', 'step': 812, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:36:59.419974', 'step': 812, 'epoch': 1} +{'type': 'loss', 'content': 0.01941845752298832, 'timestamp': '2025-09-10 02:36:59.422362', 'step': 813, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:36:59.476908', 'step': 813, 'epoch': 1} +{'type': 'loss', 'content': 0.008595914579927921, 'timestamp': '2025-09-10 02:36:59.479281', 'step': 814, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:59.533158', 'step': 814, 'epoch': 1} +{'type': 'loss', 'content': 0.002296484773978591, 'timestamp': '2025-09-10 02:36:59.539035', 'step': 815, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:59.593264', 'step': 815, 'epoch': 1} +{'type': 'loss', 'content': 0.020253313705325127, 'timestamp': '2025-09-10 02:36:59.599671', 'step': 816, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:36:59.659221', 'step': 816, 'epoch': 1} +{'type': 'loss', 'content': 0.010540309362113476, 'timestamp': '2025-09-10 02:36:59.670736', 'step': 817, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:36:59.728584', 'step': 817, 'epoch': 1} +{'type': 'loss', 'content': 0.022805223241448402, 'timestamp': '2025-09-10 02:36:59.739002', 'step': 818, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:36:59.792947', 'step': 818, 'epoch': 1} +{'type': 'loss', 'content': 0.03727426007390022, 'timestamp': '2025-09-10 02:36:59.798505', 'step': 819, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:36:59.851574', 'step': 819, 'epoch': 1} +{'type': 'loss', 'content': 0.03954698517918587, 'timestamp': '2025-09-10 02:36:59.858025', 'step': 820, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:36:59.910942', 'step': 820, 'epoch': 1} +{'type': 'loss', 'content': 0.020105760544538498, 'timestamp': '2025-09-10 02:36:59.913253', 'step': 821, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:36:59.971654', 'step': 821, 'epoch': 1} +{'type': 'loss', 'content': 0.06767837703227997, 'timestamp': '2025-09-10 02:36:59.974389', 'step': 822, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:37:00.031750', 'step': 822, 'epoch': 1} +{'type': 'loss', 'content': 0.0019293931545689702, 'timestamp': '2025-09-10 02:37:00.033880', 'step': 823, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:00.093895', 'step': 823, 'epoch': 1} +{'type': 'loss', 'content': 0.0019679716788232327, 'timestamp': '2025-09-10 02:37:00.100305', 'step': 824, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:00.244698', 'step': 824, 'epoch': 1} +{'type': 'loss', 'content': 0.011093164794147015, 'timestamp': '2025-09-10 02:37:00.250234', 'step': 825, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:00.303379', 'step': 825, 'epoch': 1} +{'type': 'loss', 'content': 0.039194487035274506, 'timestamp': '2025-09-10 02:37:00.305835', 'step': 826, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:00.359015', 'step': 826, 'epoch': 1} +{'type': 'loss', 'content': 0.05020434036850929, 'timestamp': '2025-09-10 02:37:00.361245', 'step': 827, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:00.413939', 'step': 827, 'epoch': 1} +{'type': 'loss', 'content': 0.006449654698371887, 'timestamp': '2025-09-10 02:37:00.420116', 'step': 828, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:00.472686', 'step': 828, 'epoch': 1} +{'type': 'loss', 'content': 0.007012534886598587, 'timestamp': '2025-09-10 02:37:00.482887', 'step': 829, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:00.535772', 'step': 829, 'epoch': 1} +{'type': 'loss', 'content': 0.01749882847070694, 'timestamp': '2025-09-10 02:37:00.538787', 'step': 830, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:00.591577', 'step': 830, 'epoch': 1} +{'type': 'loss', 'content': 0.016114160418510437, 'timestamp': '2025-09-10 02:37:00.593718', 'step': 831, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:00.652188', 'step': 831, 'epoch': 1} +{'type': 'loss', 'content': 0.028149515390396118, 'timestamp': '2025-09-10 02:37:00.662795', 'step': 832, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:00.715253', 'step': 832, 'epoch': 1} +{'type': 'loss', 'content': 0.014135974459350109, 'timestamp': '2025-09-10 02:37:00.721502', 'step': 833, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:00.775531', 'step': 833, 'epoch': 1} +{'type': 'loss', 'content': 0.03076109290122986, 'timestamp': '2025-09-10 02:37:00.777582', 'step': 834, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:00.830923', 'step': 834, 'epoch': 1} +{'type': 'loss', 'content': 0.03094400279223919, 'timestamp': '2025-09-10 02:37:00.838245', 'step': 835, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:00.891325', 'step': 835, 'epoch': 1} +{'type': 'loss', 'content': 0.022063499316573143, 'timestamp': '2025-09-10 02:37:00.897401', 'step': 836, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:00.949343', 'step': 836, 'epoch': 1} +{'type': 'loss', 'content': 0.01406343374401331, 'timestamp': '2025-09-10 02:37:00.959603', 'step': 837, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:01.027564', 'step': 837, 'epoch': 1} +{'type': 'loss', 'content': 0.03292260691523552, 'timestamp': '2025-09-10 02:37:01.040119', 'step': 838, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:01.097583', 'step': 838, 'epoch': 1} +{'type': 'loss', 'content': 0.011248174123466015, 'timestamp': '2025-09-10 02:37:01.099885', 'step': 839, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:01.152435', 'step': 839, 'epoch': 1} +{'type': 'loss', 'content': 0.02245539426803589, 'timestamp': '2025-09-10 02:37:01.158608', 'step': 840, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:37:01.230457', 'step': 840, 'epoch': 1} +{'type': 'loss', 'content': 0.02683345414698124, 'timestamp': '2025-09-10 02:37:01.245446', 'step': 841, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:01.298475', 'step': 841, 'epoch': 1} +{'type': 'loss', 'content': 0.011874644085764885, 'timestamp': '2025-09-10 02:37:01.300995', 'step': 842, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:01.353922', 'step': 842, 'epoch': 1} +{'type': 'loss', 'content': 0.01341636199504137, 'timestamp': '2025-09-10 02:37:01.356022', 'step': 843, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:01.409223', 'step': 843, 'epoch': 1} +{'type': 'loss', 'content': 0.012613373808562756, 'timestamp': '2025-09-10 02:37:01.415037', 'step': 844, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:37:01.481579', 'step': 844, 'epoch': 1} +{'type': 'loss', 'content': 0.019389091059565544, 'timestamp': '2025-09-10 02:37:01.495360', 'step': 845, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:01.555560', 'step': 845, 'epoch': 1} +{'type': 'loss', 'content': 0.004535573069006205, 'timestamp': '2025-09-10 02:37:01.566289', 'step': 846, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:01.627593', 'step': 846, 'epoch': 1} +{'type': 'loss', 'content': 0.018833627924323082, 'timestamp': '2025-09-10 02:37:01.638696', 'step': 847, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:01.693159', 'step': 847, 'epoch': 1} +{'type': 'loss', 'content': 0.027462048456072807, 'timestamp': '2025-09-10 02:37:01.699095', 'step': 848, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:01.753480', 'step': 848, 'epoch': 1} +{'type': 'loss', 'content': 0.01780627854168415, 'timestamp': '2025-09-10 02:37:01.756085', 'step': 849, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:01.808895', 'step': 849, 'epoch': 1} +{'type': 'loss', 'content': 0.008407698012888432, 'timestamp': '2025-09-10 02:37:01.816648', 'step': 850, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:01.869218', 'step': 850, 'epoch': 1} +{'type': 'loss', 'content': 0.006790136452764273, 'timestamp': '2025-09-10 02:37:01.871042', 'step': 851, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:01.923983', 'step': 851, 'epoch': 1} +{'type': 'loss', 'content': 0.0250264760106802, 'timestamp': '2025-09-10 02:37:01.931083', 'step': 852, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:37:02.001951', 'step': 852, 'epoch': 1} +{'type': 'loss', 'content': 0.016610300168395042, 'timestamp': '2025-09-10 02:37:02.016916', 'step': 853, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:02.087453', 'step': 853, 'epoch': 1} +{'type': 'loss', 'content': 0.011690507642924786, 'timestamp': '2025-09-10 02:37:02.100061', 'step': 854, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:02.153665', 'step': 854, 'epoch': 1} +{'type': 'loss', 'content': 0.007542649749666452, 'timestamp': '2025-09-10 02:37:02.155574', 'step': 855, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:02.208037', 'step': 855, 'epoch': 1} +{'type': 'loss', 'content': 0.006887788884341717, 'timestamp': '2025-09-10 02:37:02.214984', 'step': 856, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:02.269234', 'step': 856, 'epoch': 1} +{'type': 'loss', 'content': 0.01489509828388691, 'timestamp': '2025-09-10 02:37:02.279187', 'step': 857, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:02.331911', 'step': 857, 'epoch': 1} +{'type': 'loss', 'content': 0.01974819228053093, 'timestamp': '2025-09-10 02:37:02.334767', 'step': 858, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:02.388192', 'step': 858, 'epoch': 1} +{'type': 'loss', 'content': 0.016354762017726898, 'timestamp': '2025-09-10 02:37:02.392577', 'step': 859, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:02.454075', 'step': 859, 'epoch': 1} +{'type': 'loss', 'content': 0.014117122627794743, 'timestamp': '2025-09-10 02:37:02.465955', 'step': 860, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:02.522242', 'step': 860, 'epoch': 1} +{'type': 'loss', 'content': 0.007971592247486115, 'timestamp': '2025-09-10 02:37:02.533471', 'step': 861, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:02.586745', 'step': 861, 'epoch': 1} +{'type': 'loss', 'content': 0.009552216157317162, 'timestamp': '2025-09-10 02:37:02.589362', 'step': 862, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:02.644787', 'step': 862, 'epoch': 1} +{'type': 'loss', 'content': 0.01481309998780489, 'timestamp': '2025-09-10 02:37:02.647258', 'step': 863, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:02.700522', 'step': 863, 'epoch': 1} +{'type': 'loss', 'content': 0.017297828570008278, 'timestamp': '2025-09-10 02:37:02.712358', 'step': 864, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:02.785195', 'step': 864, 'epoch': 1} +{'type': 'loss', 'content': 0.011291074566543102, 'timestamp': '2025-09-10 02:37:02.793106', 'step': 865, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:02.846382', 'step': 865, 'epoch': 1} +{'type': 'loss', 'content': 0.026969680562615395, 'timestamp': '2025-09-10 02:37:02.848743', 'step': 866, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:02.901452', 'step': 866, 'epoch': 1} +{'type': 'loss', 'content': 0.018329216167330742, 'timestamp': '2025-09-10 02:37:02.905913', 'step': 867, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:02.960839', 'step': 867, 'epoch': 1} +{'type': 'loss', 'content': 0.007551748771220446, 'timestamp': '2025-09-10 02:37:02.966869', 'step': 868, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:03.027071', 'step': 868, 'epoch': 1} +{'type': 'loss', 'content': 0.005394156090915203, 'timestamp': '2025-09-10 02:37:03.030241', 'step': 869, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:03.086559', 'step': 869, 'epoch': 1} +{'type': 'loss', 'content': 0.005307991988956928, 'timestamp': '2025-09-10 02:37:03.088785', 'step': 870, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:03.145690', 'step': 870, 'epoch': 1} +{'type': 'loss', 'content': 0.027891067788004875, 'timestamp': '2025-09-10 02:37:03.148665', 'step': 871, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:03.201190', 'step': 871, 'epoch': 1} +{'type': 'loss', 'content': 0.010209517553448677, 'timestamp': '2025-09-10 02:37:03.207287', 'step': 872, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:03.264926', 'step': 872, 'epoch': 1} +{'type': 'loss', 'content': 0.0062334248796105385, 'timestamp': '2025-09-10 02:37:03.267165', 'step': 873, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:03.325015', 'step': 873, 'epoch': 1} +{'type': 'loss', 'content': 0.02778015471994877, 'timestamp': '2025-09-10 02:37:03.335485', 'step': 874, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:03.390570', 'step': 874, 'epoch': 1} +{'type': 'loss', 'content': 0.012220826931297779, 'timestamp': '2025-09-10 02:37:03.392849', 'step': 875, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:03.446034', 'step': 875, 'epoch': 1} +{'type': 'loss', 'content': 0.014624446630477905, 'timestamp': '2025-09-10 02:37:03.452251', 'step': 876, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:03.503993', 'step': 876, 'epoch': 1} +{'type': 'loss', 'content': 0.0029194417875260115, 'timestamp': '2025-09-10 02:37:03.506114', 'step': 877, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:03.558711', 'step': 877, 'epoch': 1} +{'type': 'loss', 'content': 0.01652137003839016, 'timestamp': '2025-09-10 02:37:03.560798', 'step': 878, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:03.613556', 'step': 878, 'epoch': 1} +{'type': 'loss', 'content': 0.010671539232134819, 'timestamp': '2025-09-10 02:37:03.620010', 'step': 879, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:03.672696', 'step': 879, 'epoch': 1} +{'type': 'loss', 'content': 0.02381194569170475, 'timestamp': '2025-09-10 02:37:03.678330', 'step': 880, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:03.731137', 'step': 880, 'epoch': 1} +{'type': 'loss', 'content': 0.022047102451324463, 'timestamp': '2025-09-10 02:37:03.741622', 'step': 881, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:03.794138', 'step': 881, 'epoch': 1} +{'type': 'loss', 'content': 0.015093098394572735, 'timestamp': '2025-09-10 02:37:03.802377', 'step': 882, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:37:20.651392', 'step': 882, 'epoch': 1} +{'type': 'pplx', 'content': 20869469.391494222, 'timestamp': '2025-09-10 02:37:20.654413', 'step': 882, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:20.714133', 'step': 882, 'epoch': 1} +{'type': 'loss', 'content': 0.019753316417336464, 'timestamp': '2025-09-10 02:37:20.717175', 'step': 883, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:20.772666', 'step': 883, 'epoch': 1} +{'type': 'loss', 'content': 0.010014870204031467, 'timestamp': '2025-09-10 02:37:20.779348', 'step': 884, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:20.833800', 'step': 884, 'epoch': 1} +{'type': 'loss', 'content': 0.0305818822234869, 'timestamp': '2025-09-10 02:37:20.839175', 'step': 885, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:20.892884', 'step': 885, 'epoch': 1} +{'type': 'loss', 'content': 0.011537541635334492, 'timestamp': '2025-09-10 02:37:20.895402', 'step': 886, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 15680095254592.0}, 'timestamp': '2025-09-10 02:37:21.015383', 'step': 886, 'epoch': 1} +{'type': 'loss', 'content': 0.028522804379463196, 'timestamp': '2025-09-10 02:37:21.037537', 'step': 887, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:21.094957', 'step': 887, 'epoch': 1} +{'type': 'loss', 'content': 0.023994384333491325, 'timestamp': '2025-09-10 02:37:21.105517', 'step': 888, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:21.158292', 'step': 888, 'epoch': 1} +{'type': 'loss', 'content': 0.014724929817020893, 'timestamp': '2025-09-10 02:37:21.162711', 'step': 889, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:21.215691', 'step': 889, 'epoch': 1} +{'type': 'loss', 'content': 0.019524620845913887, 'timestamp': '2025-09-10 02:37:21.223557', 'step': 890, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:21.284604', 'step': 890, 'epoch': 1} +{'type': 'loss', 'content': 0.00858976598829031, 'timestamp': '2025-09-10 02:37:21.290750', 'step': 891, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:37:21.359319', 'step': 891, 'epoch': 1} +{'type': 'loss', 'content': 0.013474213890731335, 'timestamp': '2025-09-10 02:37:21.372772', 'step': 892, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:21.426134', 'step': 892, 'epoch': 1} +{'type': 'loss', 'content': 0.04169805720448494, 'timestamp': '2025-09-10 02:37:21.428309', 'step': 893, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:21.489707', 'step': 893, 'epoch': 1} +{'type': 'loss', 'content': 0.021386364474892616, 'timestamp': '2025-09-10 02:37:21.492533', 'step': 894, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:21.546895', 'step': 894, 'epoch': 1} +{'type': 'loss', 'content': 0.0269964300096035, 'timestamp': '2025-09-10 02:37:21.549664', 'step': 895, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:21.603419', 'step': 895, 'epoch': 1} +{'type': 'loss', 'content': 0.007117710076272488, 'timestamp': '2025-09-10 02:37:21.612022', 'step': 896, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:21.664638', 'step': 896, 'epoch': 1} +{'type': 'loss', 'content': 0.034459445625543594, 'timestamp': '2025-09-10 02:37:21.667360', 'step': 897, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:21.727701', 'step': 897, 'epoch': 1} +{'type': 'loss', 'content': 0.0017979221884161234, 'timestamp': '2025-09-10 02:37:21.730022', 'step': 898, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:21.782700', 'step': 898, 'epoch': 1} +{'type': 'loss', 'content': 0.01059913169592619, 'timestamp': '2025-09-10 02:37:21.785460', 'step': 899, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:21.838584', 'step': 899, 'epoch': 1} +{'type': 'loss', 'content': 0.02367735095322132, 'timestamp': '2025-09-10 02:37:21.844528', 'step': 900, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:21.896560', 'step': 900, 'epoch': 1} +{'type': 'loss', 'content': 0.004368102643638849, 'timestamp': '2025-09-10 02:37:21.898575', 'step': 901, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:21.950851', 'step': 901, 'epoch': 1} +{'type': 'loss', 'content': 0.015609510242938995, 'timestamp': '2025-09-10 02:37:21.953668', 'step': 902, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:22.009153', 'step': 902, 'epoch': 1} +{'type': 'loss', 'content': 0.03799451142549515, 'timestamp': '2025-09-10 02:37:22.014516', 'step': 903, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:37:22.083055', 'step': 903, 'epoch': 1} +{'type': 'loss', 'content': 0.0022819351870566607, 'timestamp': '2025-09-10 02:37:22.096019', 'step': 904, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:22.158777', 'step': 904, 'epoch': 1} +{'type': 'loss', 'content': 0.003602777374908328, 'timestamp': '2025-09-10 02:37:22.164120', 'step': 905, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:22.217786', 'step': 905, 'epoch': 1} +{'type': 'loss', 'content': 0.02644296921789646, 'timestamp': '2025-09-10 02:37:22.220013', 'step': 906, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:37:22.294376', 'step': 906, 'epoch': 1} +{'type': 'loss', 'content': 0.030447587370872498, 'timestamp': '2025-09-10 02:37:22.308275', 'step': 907, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:22.369152', 'step': 907, 'epoch': 1} +{'type': 'loss', 'content': 0.005110509227961302, 'timestamp': '2025-09-10 02:37:22.380693', 'step': 908, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:22.434012', 'step': 908, 'epoch': 1} +{'type': 'loss', 'content': 0.019296277314424515, 'timestamp': '2025-09-10 02:37:22.436062', 'step': 909, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:22.489479', 'step': 909, 'epoch': 1} +{'type': 'loss', 'content': 0.004453285131603479, 'timestamp': '2025-09-10 02:37:22.491523', 'step': 910, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:22.544779', 'step': 910, 'epoch': 1} +{'type': 'loss', 'content': 0.0037371909711509943, 'timestamp': '2025-09-10 02:37:22.546869', 'step': 911, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:22.600931', 'step': 911, 'epoch': 1} +{'type': 'loss', 'content': 0.01771688647568226, 'timestamp': '2025-09-10 02:37:22.607251', 'step': 912, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:22.661913', 'step': 912, 'epoch': 1} +{'type': 'loss', 'content': 0.012563616037368774, 'timestamp': '2025-09-10 02:37:22.664087', 'step': 913, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:22.718792', 'step': 913, 'epoch': 1} +{'type': 'loss', 'content': 0.011310129426419735, 'timestamp': '2025-09-10 02:37:22.720987', 'step': 914, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:22.774989', 'step': 914, 'epoch': 1} +{'type': 'loss', 'content': 0.016410497948527336, 'timestamp': '2025-09-10 02:37:22.777121', 'step': 915, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:22.830237', 'step': 915, 'epoch': 1} +{'type': 'loss', 'content': 0.006099455989897251, 'timestamp': '2025-09-10 02:37:22.836281', 'step': 916, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:22.888958', 'step': 916, 'epoch': 1} +{'type': 'loss', 'content': 0.01592996157705784, 'timestamp': '2025-09-10 02:37:22.891053', 'step': 917, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:22.943212', 'step': 917, 'epoch': 1} +{'type': 'loss', 'content': 0.01166507601737976, 'timestamp': '2025-09-10 02:37:22.946000', 'step': 918, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:22.999597', 'step': 918, 'epoch': 1} +{'type': 'loss', 'content': 0.004178749863058329, 'timestamp': '2025-09-10 02:37:23.002301', 'step': 919, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:23.055315', 'step': 919, 'epoch': 1} +{'type': 'loss', 'content': 0.0035687366034835577, 'timestamp': '2025-09-10 02:37:23.061499', 'step': 920, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:23.117005', 'step': 920, 'epoch': 1} +{'type': 'loss', 'content': 0.0027293937746435404, 'timestamp': '2025-09-10 02:37:23.119196', 'step': 921, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:23.173803', 'step': 921, 'epoch': 1} +{'type': 'loss', 'content': 0.002716184128075838, 'timestamp': '2025-09-10 02:37:23.183136', 'step': 922, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:23.238202', 'step': 922, 'epoch': 1} +{'type': 'loss', 'content': 0.01797613315284252, 'timestamp': '2025-09-10 02:37:23.244612', 'step': 923, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:23.298621', 'step': 923, 'epoch': 1} +{'type': 'loss', 'content': 0.013181501068174839, 'timestamp': '2025-09-10 02:37:23.304722', 'step': 924, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:23.371944', 'step': 924, 'epoch': 1} +{'type': 'loss', 'content': 0.010413519106805325, 'timestamp': '2025-09-10 02:37:23.377827', 'step': 925, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:37:23.441420', 'step': 925, 'epoch': 1} +{'type': 'loss', 'content': 0.00984834972769022, 'timestamp': '2025-09-10 02:37:23.454547', 'step': 926, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:23.523230', 'step': 926, 'epoch': 1} +{'type': 'loss', 'content': 0.02834111824631691, 'timestamp': '2025-09-10 02:37:23.527975', 'step': 927, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:23.600815', 'step': 927, 'epoch': 1} +{'type': 'loss', 'content': 0.005702574271708727, 'timestamp': '2025-09-10 02:37:23.611012', 'step': 928, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:23.669715', 'step': 928, 'epoch': 1} +{'type': 'loss', 'content': 0.024012399837374687, 'timestamp': '2025-09-10 02:37:23.675668', 'step': 929, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:23.737279', 'step': 929, 'epoch': 1} +{'type': 'loss', 'content': 0.019258147105574608, 'timestamp': '2025-09-10 02:37:23.743841', 'step': 930, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:23.815044', 'step': 930, 'epoch': 1} +{'type': 'loss', 'content': 0.0035729738883674145, 'timestamp': '2025-09-10 02:37:23.824841', 'step': 931, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:23.881800', 'step': 931, 'epoch': 1} +{'type': 'loss', 'content': 0.013548379763960838, 'timestamp': '2025-09-10 02:37:23.895440', 'step': 932, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:23.955157', 'step': 932, 'epoch': 1} +{'type': 'loss', 'content': 0.021062707528471947, 'timestamp': '2025-09-10 02:37:23.962403', 'step': 933, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:24.017783', 'step': 933, 'epoch': 1} +{'type': 'loss', 'content': 0.0214505884796381, 'timestamp': '2025-09-10 02:37:24.027741', 'step': 934, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:24.087759', 'step': 934, 'epoch': 1} +{'type': 'loss', 'content': 0.020234962925314903, 'timestamp': '2025-09-10 02:37:24.091438', 'step': 935, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:24.154593', 'step': 935, 'epoch': 1} +{'type': 'loss', 'content': 0.007375818677246571, 'timestamp': '2025-09-10 02:37:24.164577', 'step': 936, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:24.231528', 'step': 936, 'epoch': 1} +{'type': 'loss', 'content': 0.03821520134806633, 'timestamp': '2025-09-10 02:37:24.239250', 'step': 937, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:24.308413', 'step': 937, 'epoch': 1} +{'type': 'loss', 'content': 0.001402406021952629, 'timestamp': '2025-09-10 02:37:24.310546', 'step': 938, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:24.364572', 'step': 938, 'epoch': 1} +{'type': 'loss', 'content': 0.0036980025470256805, 'timestamp': '2025-09-10 02:37:24.366720', 'step': 939, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:24.419885', 'step': 939, 'epoch': 1} +{'type': 'loss', 'content': 0.020188165828585625, 'timestamp': '2025-09-10 02:37:24.426174', 'step': 940, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:24.491994', 'step': 940, 'epoch': 1} +{'type': 'loss', 'content': 0.0015013186493888497, 'timestamp': '2025-09-10 02:37:24.505646', 'step': 941, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:24.559282', 'step': 941, 'epoch': 1} +{'type': 'loss', 'content': 0.005488789640367031, 'timestamp': '2025-09-10 02:37:24.561430', 'step': 942, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:24.614292', 'step': 942, 'epoch': 1} +{'type': 'loss', 'content': 0.015114936046302319, 'timestamp': '2025-09-10 02:37:24.617117', 'step': 943, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:24.670318', 'step': 943, 'epoch': 1} +{'type': 'loss', 'content': 0.01635124906897545, 'timestamp': '2025-09-10 02:37:24.676386', 'step': 944, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:24.728570', 'step': 944, 'epoch': 1} +{'type': 'loss', 'content': 0.02673029527068138, 'timestamp': '2025-09-10 02:37:24.730783', 'step': 945, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:24.783898', 'step': 945, 'epoch': 1} +{'type': 'loss', 'content': 0.023836245760321617, 'timestamp': '2025-09-10 02:37:24.785892', 'step': 946, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:24.838782', 'step': 946, 'epoch': 1} +{'type': 'loss', 'content': 0.012506467290222645, 'timestamp': '2025-09-10 02:37:24.840983', 'step': 947, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:24.893496', 'step': 947, 'epoch': 1} +{'type': 'loss', 'content': 0.003875403432175517, 'timestamp': '2025-09-10 02:37:24.899656', 'step': 948, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:24.951520', 'step': 948, 'epoch': 1} +{'type': 'loss', 'content': 0.017567379400134087, 'timestamp': '2025-09-10 02:37:24.954324', 'step': 949, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:25.006905', 'step': 949, 'epoch': 1} +{'type': 'loss', 'content': 0.00482145743444562, 'timestamp': '2025-09-10 02:37:25.009063', 'step': 950, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:25.061710', 'step': 950, 'epoch': 1} +{'type': 'loss', 'content': 0.02905195951461792, 'timestamp': '2025-09-10 02:37:25.064468', 'step': 951, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:25.117589', 'step': 951, 'epoch': 1} +{'type': 'loss', 'content': 0.005482594482600689, 'timestamp': '2025-09-10 02:37:25.126403', 'step': 952, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:25.179035', 'step': 952, 'epoch': 1} +{'type': 'loss', 'content': 0.01592409238219261, 'timestamp': '2025-09-10 02:37:25.181007', 'step': 953, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:25.233815', 'step': 953, 'epoch': 1} +{'type': 'loss', 'content': 0.024157240986824036, 'timestamp': '2025-09-10 02:37:25.240279', 'step': 954, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:25.297972', 'step': 954, 'epoch': 1} +{'type': 'loss', 'content': 0.021483780816197395, 'timestamp': '2025-09-10 02:37:25.308401', 'step': 955, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:25.361147', 'step': 955, 'epoch': 1} +{'type': 'loss', 'content': 0.006510969717055559, 'timestamp': '2025-09-10 02:37:25.368372', 'step': 956, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:25.420754', 'step': 956, 'epoch': 1} +{'type': 'loss', 'content': 0.030279411002993584, 'timestamp': '2025-09-10 02:37:25.422817', 'step': 957, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:25.475686', 'step': 957, 'epoch': 1} +{'type': 'loss', 'content': 0.018104463815689087, 'timestamp': '2025-09-10 02:37:25.477827', 'step': 958, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:25.530938', 'step': 958, 'epoch': 1} +{'type': 'loss', 'content': 0.05846955254673958, 'timestamp': '2025-09-10 02:37:25.533073', 'step': 959, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:25.585675', 'step': 959, 'epoch': 1} +{'type': 'loss', 'content': 0.0069233691319823265, 'timestamp': '2025-09-10 02:37:25.591570', 'step': 960, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:25.644135', 'step': 960, 'epoch': 1} +{'type': 'loss', 'content': 0.02537854015827179, 'timestamp': '2025-09-10 02:37:25.646168', 'step': 961, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:25.707005', 'step': 961, 'epoch': 1} +{'type': 'loss', 'content': 0.004002484958618879, 'timestamp': '2025-09-10 02:37:25.718112', 'step': 962, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:25.772178', 'step': 962, 'epoch': 1} +{'type': 'loss', 'content': 0.058410581201314926, 'timestamp': '2025-09-10 02:37:25.781784', 'step': 963, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:37:25.842984', 'step': 963, 'epoch': 1} +{'type': 'loss', 'content': 0.005029276013374329, 'timestamp': '2025-09-10 02:37:25.854695', 'step': 964, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:25.907547', 'step': 964, 'epoch': 1} +{'type': 'loss', 'content': 0.018447445705533028, 'timestamp': '2025-09-10 02:37:25.909777', 'step': 965, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:25.963575', 'step': 965, 'epoch': 1} +{'type': 'loss', 'content': 0.024119997397065163, 'timestamp': '2025-09-10 02:37:25.965886', 'step': 966, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:26.019108', 'step': 966, 'epoch': 1} +{'type': 'loss', 'content': 0.0029526797588914633, 'timestamp': '2025-09-10 02:37:26.021461', 'step': 967, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:26.075812', 'step': 967, 'epoch': 1} +{'type': 'loss', 'content': 0.011708670295774937, 'timestamp': '2025-09-10 02:37:26.086371', 'step': 968, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-09-10 02:37:26.167437', 'step': 968, 'epoch': 1} +{'type': 'loss', 'content': 0.010926509276032448, 'timestamp': '2025-09-10 02:37:26.184488', 'step': 969, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:26.237659', 'step': 969, 'epoch': 1} +{'type': 'loss', 'content': 0.007356074173003435, 'timestamp': '2025-09-10 02:37:26.243695', 'step': 970, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:26.296984', 'step': 970, 'epoch': 1} +{'type': 'loss', 'content': 0.009588902816176414, 'timestamp': '2025-09-10 02:37:26.299088', 'step': 971, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:26.352177', 'step': 971, 'epoch': 1} +{'type': 'loss', 'content': 0.004068955313414335, 'timestamp': '2025-09-10 02:37:26.361729', 'step': 972, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:26.420236', 'step': 972, 'epoch': 1} +{'type': 'loss', 'content': 0.01241745799779892, 'timestamp': '2025-09-10 02:37:26.431781', 'step': 973, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:26.484972', 'step': 973, 'epoch': 1} +{'type': 'loss', 'content': 0.0047929794527590275, 'timestamp': '2025-09-10 02:37:26.486955', 'step': 974, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:26.539958', 'step': 974, 'epoch': 1} +{'type': 'loss', 'content': 0.01755589060485363, 'timestamp': '2025-09-10 02:37:26.541950', 'step': 975, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:26.595109', 'step': 975, 'epoch': 1} +{'type': 'loss', 'content': 0.034460101276636124, 'timestamp': '2025-09-10 02:37:26.601124', 'step': 976, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:26.653233', 'step': 976, 'epoch': 1} +{'type': 'loss', 'content': 0.030974719673395157, 'timestamp': '2025-09-10 02:37:26.655258', 'step': 977, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:26.707649', 'step': 977, 'epoch': 1} +{'type': 'loss', 'content': 0.016706557944417, 'timestamp': '2025-09-10 02:37:26.709553', 'step': 978, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:26.762118', 'step': 978, 'epoch': 1} +{'type': 'loss', 'content': 0.011854681186378002, 'timestamp': '2025-09-10 02:37:26.764060', 'step': 979, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:26.817488', 'step': 979, 'epoch': 1} +{'type': 'loss', 'content': 0.012618793174624443, 'timestamp': '2025-09-10 02:37:26.824345', 'step': 980, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:26.876718', 'step': 980, 'epoch': 1} +{'type': 'loss', 'content': 0.009645439684391022, 'timestamp': '2025-09-10 02:37:26.878784', 'step': 981, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:26.931714', 'step': 981, 'epoch': 1} +{'type': 'loss', 'content': 0.020316239446401596, 'timestamp': '2025-09-10 02:37:26.939923', 'step': 982, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:26.992894', 'step': 982, 'epoch': 1} +{'type': 'loss', 'content': 0.01200488768517971, 'timestamp': '2025-09-10 02:37:26.994896', 'step': 983, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:27.046853', 'step': 983, 'epoch': 1} +{'type': 'loss', 'content': 0.00738444784656167, 'timestamp': '2025-09-10 02:37:27.052549', 'step': 984, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:27.103948', 'step': 984, 'epoch': 1} +{'type': 'loss', 'content': 0.006664204876869917, 'timestamp': '2025-09-10 02:37:27.106095', 'step': 985, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:27.174167', 'step': 985, 'epoch': 1} +{'type': 'loss', 'content': 0.02430642768740654, 'timestamp': '2025-09-10 02:37:27.176463', 'step': 986, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:27.233614', 'step': 986, 'epoch': 1} +{'type': 'loss', 'content': 0.020492246374487877, 'timestamp': '2025-09-10 02:37:27.238808', 'step': 987, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:27.297906', 'step': 987, 'epoch': 1} +{'type': 'loss', 'content': 0.0061102090403437614, 'timestamp': '2025-09-10 02:37:27.309121', 'step': 988, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:27.360981', 'step': 988, 'epoch': 1} +{'type': 'loss', 'content': 0.007920605130493641, 'timestamp': '2025-09-10 02:37:27.363154', 'step': 989, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:27.416351', 'step': 989, 'epoch': 1} +{'type': 'loss', 'content': 0.03330426290631294, 'timestamp': '2025-09-10 02:37:27.425938', 'step': 990, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:27.478779', 'step': 990, 'epoch': 1} +{'type': 'loss', 'content': 0.028603479266166687, 'timestamp': '2025-09-10 02:37:27.481062', 'step': 991, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:27.533716', 'step': 991, 'epoch': 1} +{'type': 'loss', 'content': 0.015076139941811562, 'timestamp': '2025-09-10 02:37:27.539522', 'step': 992, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:27.591893', 'step': 992, 'epoch': 1} +{'type': 'loss', 'content': 0.006608594208955765, 'timestamp': '2025-09-10 02:37:27.593844', 'step': 993, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:27.646415', 'step': 993, 'epoch': 1} +{'type': 'loss', 'content': 0.056671105325222015, 'timestamp': '2025-09-10 02:37:27.648443', 'step': 994, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:27.700978', 'step': 994, 'epoch': 1} +{'type': 'loss', 'content': 0.005207512062042952, 'timestamp': '2025-09-10 02:37:27.703927', 'step': 995, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:27.756783', 'step': 995, 'epoch': 1} +{'type': 'loss', 'content': 0.028034314513206482, 'timestamp': '2025-09-10 02:37:27.762490', 'step': 996, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:27.814890', 'step': 996, 'epoch': 1} +{'type': 'loss', 'content': 0.012034551240503788, 'timestamp': '2025-09-10 02:37:27.825139', 'step': 997, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:27.878250', 'step': 997, 'epoch': 1} +{'type': 'loss', 'content': 0.0023099612444639206, 'timestamp': '2025-09-10 02:37:27.880505', 'step': 998, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:27.933345', 'step': 998, 'epoch': 1} +{'type': 'loss', 'content': 0.007459663785994053, 'timestamp': '2025-09-10 02:37:27.935742', 'step': 999, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:27.994504', 'step': 999, 'epoch': 1} +{'type': 'loss', 'content': 0.02349942922592163, 'timestamp': '2025-09-10 02:37:28.005088', 'step': 1000, 'epoch': 1} +{'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-10 02:37:28.489240', 'step': 1000, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:28.543458', 'step': 1000, 'epoch': 1} +{'type': 'loss', 'content': 0.014669693075120449, 'timestamp': '2025-09-10 02:37:28.545761', 'step': 1001, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:28.598898', 'step': 1001, 'epoch': 1} +{'type': 'loss', 'content': 0.028776686638593674, 'timestamp': '2025-09-10 02:37:28.608422', 'step': 1002, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:28.662115', 'step': 1002, 'epoch': 1} +{'type': 'loss', 'content': 0.03464367985725403, 'timestamp': '2025-09-10 02:37:28.664138', 'step': 1003, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:28.716945', 'step': 1003, 'epoch': 1} +{'type': 'loss', 'content': 0.024242648854851723, 'timestamp': '2025-09-10 02:37:28.724224', 'step': 1004, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:28.786540', 'step': 1004, 'epoch': 1} +{'type': 'loss', 'content': 0.0038966622669249773, 'timestamp': '2025-09-10 02:37:28.792975', 'step': 1005, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:28.845998', 'step': 1005, 'epoch': 1} +{'type': 'loss', 'content': 0.01966005750000477, 'timestamp': '2025-09-10 02:37:28.848076', 'step': 1006, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:28.900502', 'step': 1006, 'epoch': 1} +{'type': 'loss', 'content': 0.0049477508291602135, 'timestamp': '2025-09-10 02:37:28.902574', 'step': 1007, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:28.954704', 'step': 1007, 'epoch': 1} +{'type': 'loss', 'content': 0.00921022891998291, 'timestamp': '2025-09-10 02:37:28.961234', 'step': 1008, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:29.016114', 'step': 1008, 'epoch': 1} +{'type': 'loss', 'content': 0.02026495151221752, 'timestamp': '2025-09-10 02:37:29.028497', 'step': 1009, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:29.081111', 'step': 1009, 'epoch': 1} +{'type': 'loss', 'content': 0.03712758794426918, 'timestamp': '2025-09-10 02:37:29.086151', 'step': 1010, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:29.141777', 'step': 1010, 'epoch': 1} +{'type': 'loss', 'content': 0.012790190987288952, 'timestamp': '2025-09-10 02:37:29.148091', 'step': 1011, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:29.206205', 'step': 1011, 'epoch': 1} +{'type': 'loss', 'content': 0.016143744811415672, 'timestamp': '2025-09-10 02:37:29.211943', 'step': 1012, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:29.268079', 'step': 1012, 'epoch': 1} +{'type': 'loss', 'content': 0.012261556461453438, 'timestamp': '2025-09-10 02:37:29.271480', 'step': 1013, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:29.325039', 'step': 1013, 'epoch': 1} +{'type': 'loss', 'content': 0.005705040879547596, 'timestamp': '2025-09-10 02:37:29.327039', 'step': 1014, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:29.381028', 'step': 1014, 'epoch': 1} +{'type': 'loss', 'content': 0.009582250379025936, 'timestamp': '2025-09-10 02:37:29.389697', 'step': 1015, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:29.446588', 'step': 1015, 'epoch': 1} +{'type': 'loss', 'content': 0.026290949434041977, 'timestamp': '2025-09-10 02:37:29.452465', 'step': 1016, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:29.504316', 'step': 1016, 'epoch': 1} +{'type': 'loss', 'content': 0.01569192297756672, 'timestamp': '2025-09-10 02:37:29.509140', 'step': 1017, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:29.561622', 'step': 1017, 'epoch': 1} +{'type': 'loss', 'content': 0.005800510756671429, 'timestamp': '2025-09-10 02:37:29.569727', 'step': 1018, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:29.637230', 'step': 1018, 'epoch': 1} +{'type': 'loss', 'content': 0.02611362375319004, 'timestamp': '2025-09-10 02:37:29.649826', 'step': 1019, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:29.702667', 'step': 1019, 'epoch': 1} +{'type': 'loss', 'content': 0.00921781174838543, 'timestamp': '2025-09-10 02:37:29.708460', 'step': 1020, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:37:29.761153', 'step': 1020, 'epoch': 1} +{'type': 'loss', 'content': 0.028634995222091675, 'timestamp': '2025-09-10 02:37:29.763343', 'step': 1021, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:29.820325', 'step': 1021, 'epoch': 1} +{'type': 'loss', 'content': 0.02150345966219902, 'timestamp': '2025-09-10 02:37:29.822418', 'step': 1022, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:29.876888', 'step': 1022, 'epoch': 1} +{'type': 'loss', 'content': 0.025369632989168167, 'timestamp': '2025-09-10 02:37:29.886640', 'step': 1023, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:29.948450', 'step': 1023, 'epoch': 1} +{'type': 'loss', 'content': 0.023534994572401047, 'timestamp': '2025-09-10 02:37:29.960345', 'step': 1024, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:30.026379', 'step': 1024, 'epoch': 1} +{'type': 'loss', 'content': 0.012249263003468513, 'timestamp': '2025-09-10 02:37:30.040031', 'step': 1025, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:30.097662', 'step': 1025, 'epoch': 1} +{'type': 'loss', 'content': 0.019058462232351303, 'timestamp': '2025-09-10 02:37:30.108096', 'step': 1026, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:30.164934', 'step': 1026, 'epoch': 1} +{'type': 'loss', 'content': 0.007724892348051071, 'timestamp': '2025-09-10 02:37:30.174571', 'step': 1027, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:30.247665', 'step': 1027, 'epoch': 1} +{'type': 'loss', 'content': 0.012001357972621918, 'timestamp': '2025-09-10 02:37:30.259580', 'step': 1028, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:30.312582', 'step': 1028, 'epoch': 1} +{'type': 'loss', 'content': 0.005957506131380796, 'timestamp': '2025-09-10 02:37:30.320781', 'step': 1029, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:37:47.206926', 'step': 1029, 'epoch': 1} +{'type': 'pplx', 'content': 24619440.02975512, 'timestamp': '2025-09-10 02:37:47.209600', 'step': 1029, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:47.263771', 'step': 1029, 'epoch': 1} +{'type': 'loss', 'content': 0.02884192392230034, 'timestamp': '2025-09-10 02:37:47.267627', 'step': 1030, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:47.320940', 'step': 1030, 'epoch': 1} +{'type': 'loss', 'content': 0.011000215075910091, 'timestamp': '2025-09-10 02:37:47.322776', 'step': 1031, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:47.378122', 'step': 1031, 'epoch': 1} +{'type': 'loss', 'content': 0.011350167915225029, 'timestamp': '2025-09-10 02:37:47.384860', 'step': 1032, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:47.444205', 'step': 1032, 'epoch': 1} +{'type': 'loss', 'content': 0.01561619620770216, 'timestamp': '2025-09-10 02:37:47.446115', 'step': 1033, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:47.498539', 'step': 1033, 'epoch': 1} +{'type': 'loss', 'content': 0.01655939407646656, 'timestamp': '2025-09-10 02:37:47.513302', 'step': 1034, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:47.577064', 'step': 1034, 'epoch': 1} +{'type': 'loss', 'content': 0.005556761287152767, 'timestamp': '2025-09-10 02:37:47.580405', 'step': 1035, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:47.636782', 'step': 1035, 'epoch': 1} +{'type': 'loss', 'content': 0.016216743737459183, 'timestamp': '2025-09-10 02:37:47.645712', 'step': 1036, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:47.700483', 'step': 1036, 'epoch': 1} +{'type': 'loss', 'content': 0.034411948174238205, 'timestamp': '2025-09-10 02:37:47.711033', 'step': 1037, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:47.763991', 'step': 1037, 'epoch': 1} +{'type': 'loss', 'content': 0.006037055980414152, 'timestamp': '2025-09-10 02:37:47.766135', 'step': 1038, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:47.818945', 'step': 1038, 'epoch': 1} +{'type': 'loss', 'content': 0.005499404389411211, 'timestamp': '2025-09-10 02:37:47.820973', 'step': 1039, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:47.874955', 'step': 1039, 'epoch': 1} +{'type': 'loss', 'content': 0.022077979519963264, 'timestamp': '2025-09-10 02:37:47.885358', 'step': 1040, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:37:47.945031', 'step': 1040, 'epoch': 1} +{'type': 'loss', 'content': 0.008110353723168373, 'timestamp': '2025-09-10 02:37:47.957041', 'step': 1041, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:48.019306', 'step': 1041, 'epoch': 1} +{'type': 'loss', 'content': 0.024108950048685074, 'timestamp': '2025-09-10 02:37:48.025951', 'step': 1042, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:48.078876', 'step': 1042, 'epoch': 1} +{'type': 'loss', 'content': 0.027277912944555283, 'timestamp': '2025-09-10 02:37:48.080847', 'step': 1043, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:48.133481', 'step': 1043, 'epoch': 1} +{'type': 'loss', 'content': 0.009014950133860111, 'timestamp': '2025-09-10 02:37:48.139031', 'step': 1044, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:48.190974', 'step': 1044, 'epoch': 1} +{'type': 'loss', 'content': 0.009914405643939972, 'timestamp': '2025-09-10 02:37:48.201216', 'step': 1045, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:37:48.269417', 'step': 1045, 'epoch': 1} +{'type': 'loss', 'content': 0.006300345994532108, 'timestamp': '2025-09-10 02:37:48.282143', 'step': 1046, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:48.334989', 'step': 1046, 'epoch': 1} +{'type': 'loss', 'content': 0.00424076896160841, 'timestamp': '2025-09-10 02:37:48.337099', 'step': 1047, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:48.389580', 'step': 1047, 'epoch': 1} +{'type': 'loss', 'content': 0.034432608634233475, 'timestamp': '2025-09-10 02:37:48.395326', 'step': 1048, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:48.446945', 'step': 1048, 'epoch': 1} +{'type': 'loss', 'content': 0.015104919672012329, 'timestamp': '2025-09-10 02:37:48.457232', 'step': 1049, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:48.511389', 'step': 1049, 'epoch': 1} +{'type': 'loss', 'content': 0.025710944086313248, 'timestamp': '2025-09-10 02:37:48.513353', 'step': 1050, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:48.565779', 'step': 1050, 'epoch': 1} +{'type': 'loss', 'content': 0.021173741668462753, 'timestamp': '2025-09-10 02:37:48.572349', 'step': 1051, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:48.630570', 'step': 1051, 'epoch': 1} +{'type': 'loss', 'content': 0.013069218955934048, 'timestamp': '2025-09-10 02:37:48.641751', 'step': 1052, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:48.694417', 'step': 1052, 'epoch': 1} +{'type': 'loss', 'content': 0.01654890365898609, 'timestamp': '2025-09-10 02:37:48.696395', 'step': 1053, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:48.753911', 'step': 1053, 'epoch': 1} +{'type': 'loss', 'content': 0.01027678046375513, 'timestamp': '2025-09-10 02:37:48.764329', 'step': 1054, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:48.816780', 'step': 1054, 'epoch': 1} +{'type': 'loss', 'content': 0.019616004079580307, 'timestamp': '2025-09-10 02:37:48.823174', 'step': 1055, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:48.875669', 'step': 1055, 'epoch': 1} +{'type': 'loss', 'content': 0.01368601806461811, 'timestamp': '2025-09-10 02:37:48.881479', 'step': 1056, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:48.932942', 'step': 1056, 'epoch': 1} +{'type': 'loss', 'content': 0.022524571046233177, 'timestamp': '2025-09-10 02:37:48.935988', 'step': 1057, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:48.988151', 'step': 1057, 'epoch': 1} +{'type': 'loss', 'content': 0.005104460753500462, 'timestamp': '2025-09-10 02:37:48.991258', 'step': 1058, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:49.046483', 'step': 1058, 'epoch': 1} +{'type': 'loss', 'content': 0.01909588649868965, 'timestamp': '2025-09-10 02:37:49.049648', 'step': 1059, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:49.101786', 'step': 1059, 'epoch': 1} +{'type': 'loss', 'content': 0.008402914740145206, 'timestamp': '2025-09-10 02:37:49.107288', 'step': 1060, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:49.158871', 'step': 1060, 'epoch': 1} +{'type': 'loss', 'content': 0.0161873959004879, 'timestamp': '2025-09-10 02:37:49.160959', 'step': 1061, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:49.220713', 'step': 1061, 'epoch': 1} +{'type': 'loss', 'content': 0.007617958355695009, 'timestamp': '2025-09-10 02:37:49.231419', 'step': 1062, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:49.284675', 'step': 1062, 'epoch': 1} +{'type': 'loss', 'content': 0.02958431839942932, 'timestamp': '2025-09-10 02:37:49.291383', 'step': 1063, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:37:49.364424', 'step': 1063, 'epoch': 1} +{'type': 'loss', 'content': 0.022033551707863808, 'timestamp': '2025-09-10 02:37:49.378901', 'step': 1064, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:49.431331', 'step': 1064, 'epoch': 1} +{'type': 'loss', 'content': 0.02828170545399189, 'timestamp': '2025-09-10 02:37:49.433325', 'step': 1065, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:49.486112', 'step': 1065, 'epoch': 1} +{'type': 'loss', 'content': 0.01017991453409195, 'timestamp': '2025-09-10 02:37:49.488063', 'step': 1066, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:49.540450', 'step': 1066, 'epoch': 1} +{'type': 'loss', 'content': 0.017581427469849586, 'timestamp': '2025-09-10 02:37:49.542388', 'step': 1067, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:49.594943', 'step': 1067, 'epoch': 1} +{'type': 'loss', 'content': 0.006788523402065039, 'timestamp': '2025-09-10 02:37:49.602335', 'step': 1068, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:49.654227', 'step': 1068, 'epoch': 1} +{'type': 'loss', 'content': 0.011207656934857368, 'timestamp': '2025-09-10 02:37:49.656280', 'step': 1069, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:49.708439', 'step': 1069, 'epoch': 1} +{'type': 'loss', 'content': 0.005644269287586212, 'timestamp': '2025-09-10 02:37:49.710656', 'step': 1070, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:49.762784', 'step': 1070, 'epoch': 1} +{'type': 'loss', 'content': 0.03670249506831169, 'timestamp': '2025-09-10 02:37:49.769621', 'step': 1071, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:49.821936', 'step': 1071, 'epoch': 1} +{'type': 'loss', 'content': 0.014227320440113544, 'timestamp': '2025-09-10 02:37:49.827555', 'step': 1072, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:49.879455', 'step': 1072, 'epoch': 1} +{'type': 'loss', 'content': 0.007771211676299572, 'timestamp': '2025-09-10 02:37:49.882627', 'step': 1073, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:49.934660', 'step': 1073, 'epoch': 1} +{'type': 'loss', 'content': 0.008874562568962574, 'timestamp': '2025-09-10 02:37:49.936849', 'step': 1074, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:49.989454', 'step': 1074, 'epoch': 1} +{'type': 'loss', 'content': 0.029543662443757057, 'timestamp': '2025-09-10 02:37:49.991593', 'step': 1075, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:50.043818', 'step': 1075, 'epoch': 1} +{'type': 'loss', 'content': 0.004253097344189882, 'timestamp': '2025-09-10 02:37:50.049428', 'step': 1076, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:50.101070', 'step': 1076, 'epoch': 1} +{'type': 'loss', 'content': 0.005809030961245298, 'timestamp': '2025-09-10 02:37:50.102905', 'step': 1077, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:50.154900', 'step': 1077, 'epoch': 1} +{'type': 'loss', 'content': 0.010655926540493965, 'timestamp': '2025-09-10 02:37:50.157170', 'step': 1078, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:50.209772', 'step': 1078, 'epoch': 1} +{'type': 'loss', 'content': 0.009624289348721504, 'timestamp': '2025-09-10 02:37:50.212717', 'step': 1079, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:50.266677', 'step': 1079, 'epoch': 1} +{'type': 'loss', 'content': 0.006700613535940647, 'timestamp': '2025-09-10 02:37:50.272389', 'step': 1080, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:50.326123', 'step': 1080, 'epoch': 1} +{'type': 'loss', 'content': 0.005288519896566868, 'timestamp': '2025-09-10 02:37:50.329455', 'step': 1081, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:37:50.390635', 'step': 1081, 'epoch': 1} +{'type': 'loss', 'content': 0.0022787742782384157, 'timestamp': '2025-09-10 02:37:50.401563', 'step': 1082, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:50.462272', 'step': 1082, 'epoch': 1} +{'type': 'loss', 'content': 0.019391821697354317, 'timestamp': '2025-09-10 02:37:50.465190', 'step': 1083, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:50.519309', 'step': 1083, 'epoch': 1} +{'type': 'loss', 'content': 0.010357503779232502, 'timestamp': '2025-09-10 02:37:50.525094', 'step': 1084, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:50.577254', 'step': 1084, 'epoch': 1} +{'type': 'loss', 'content': 0.02498718723654747, 'timestamp': '2025-09-10 02:37:50.585520', 'step': 1085, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:50.637985', 'step': 1085, 'epoch': 1} +{'type': 'loss', 'content': 0.018656384199857712, 'timestamp': '2025-09-10 02:37:50.639966', 'step': 1086, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:50.692361', 'step': 1086, 'epoch': 1} +{'type': 'loss', 'content': 0.009002963081002235, 'timestamp': '2025-09-10 02:37:50.700435', 'step': 1087, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:50.753069', 'step': 1087, 'epoch': 1} +{'type': 'loss', 'content': 0.015503662638366222, 'timestamp': '2025-09-10 02:37:50.760444', 'step': 1088, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:50.817035', 'step': 1088, 'epoch': 1} +{'type': 'loss', 'content': 0.015446570701897144, 'timestamp': '2025-09-10 02:37:50.818930', 'step': 1089, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:50.871366', 'step': 1089, 'epoch': 1} +{'type': 'loss', 'content': 0.009854676201939583, 'timestamp': '2025-09-10 02:37:50.879581', 'step': 1090, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:50.932492', 'step': 1090, 'epoch': 1} +{'type': 'loss', 'content': 0.00652840081602335, 'timestamp': '2025-09-10 02:37:50.934549', 'step': 1091, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:50.987250', 'step': 1091, 'epoch': 1} +{'type': 'loss', 'content': 0.008367020636796951, 'timestamp': '2025-09-10 02:37:50.996328', 'step': 1092, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:51.051122', 'step': 1092, 'epoch': 1} +{'type': 'loss', 'content': 0.006936206948012114, 'timestamp': '2025-09-10 02:37:51.061375', 'step': 1093, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:51.114312', 'step': 1093, 'epoch': 1} +{'type': 'loss', 'content': 0.02077474072575569, 'timestamp': '2025-09-10 02:37:51.116238', 'step': 1094, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:51.168891', 'step': 1094, 'epoch': 1} +{'type': 'loss', 'content': 0.01412847638130188, 'timestamp': '2025-09-10 02:37:51.175461', 'step': 1095, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:51.228332', 'step': 1095, 'epoch': 1} +{'type': 'loss', 'content': 0.014179607853293419, 'timestamp': '2025-09-10 02:37:51.234178', 'step': 1096, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:51.286239', 'step': 1096, 'epoch': 1} +{'type': 'loss', 'content': 0.0063507407903671265, 'timestamp': '2025-09-10 02:37:51.288454', 'step': 1097, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:37:51.342843', 'step': 1097, 'epoch': 1} +{'type': 'loss', 'content': 0.007183849345892668, 'timestamp': '2025-09-10 02:37:51.352606', 'step': 1098, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:37:51.419043', 'step': 1098, 'epoch': 1} +{'type': 'loss', 'content': 0.010225744917988777, 'timestamp': '2025-09-10 02:37:51.431290', 'step': 1099, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:51.484416', 'step': 1099, 'epoch': 1} +{'type': 'loss', 'content': 0.0024694055318832397, 'timestamp': '2025-09-10 02:37:51.490117', 'step': 1100, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:51.542036', 'step': 1100, 'epoch': 1} +{'type': 'loss', 'content': 0.008561083115637302, 'timestamp': '2025-09-10 02:37:51.545283', 'step': 1101, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:51.597857', 'step': 1101, 'epoch': 1} +{'type': 'loss', 'content': 0.04253784194588661, 'timestamp': '2025-09-10 02:37:51.599694', 'step': 1102, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:51.656904', 'step': 1102, 'epoch': 1} +{'type': 'loss', 'content': 0.004142506048083305, 'timestamp': '2025-09-10 02:37:51.667321', 'step': 1103, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:51.719576', 'step': 1103, 'epoch': 1} +{'type': 'loss', 'content': 0.019315490499138832, 'timestamp': '2025-09-10 02:37:51.727041', 'step': 1104, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:51.779109', 'step': 1104, 'epoch': 1} +{'type': 'loss', 'content': 0.0027652818243950605, 'timestamp': '2025-09-10 02:37:51.781288', 'step': 1105, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:51.833789', 'step': 1105, 'epoch': 1} +{'type': 'loss', 'content': 0.008513693697750568, 'timestamp': '2025-09-10 02:37:51.835775', 'step': 1106, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:51.895316', 'step': 1106, 'epoch': 1} +{'type': 'loss', 'content': 0.022946733981370926, 'timestamp': '2025-09-10 02:37:51.906008', 'step': 1107, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:51.958952', 'step': 1107, 'epoch': 1} +{'type': 'loss', 'content': 0.003004850819706917, 'timestamp': '2025-09-10 02:37:51.969283', 'step': 1108, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:52.020963', 'step': 1108, 'epoch': 1} +{'type': 'loss', 'content': 0.02548404224216938, 'timestamp': '2025-09-10 02:37:52.022825', 'step': 1109, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:52.075308', 'step': 1109, 'epoch': 1} +{'type': 'loss', 'content': 0.024745497852563858, 'timestamp': '2025-09-10 02:37:52.078141', 'step': 1110, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:52.130697', 'step': 1110, 'epoch': 1} +{'type': 'loss', 'content': 0.003216462442651391, 'timestamp': '2025-09-10 02:37:52.132455', 'step': 1111, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:52.185526', 'step': 1111, 'epoch': 1} +{'type': 'loss', 'content': 0.011288267560303211, 'timestamp': '2025-09-10 02:37:52.191281', 'step': 1112, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:52.243628', 'step': 1112, 'epoch': 1} +{'type': 'loss', 'content': 0.003838461125269532, 'timestamp': '2025-09-10 02:37:52.245889', 'step': 1113, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:52.298293', 'step': 1113, 'epoch': 1} +{'type': 'loss', 'content': 0.007491410709917545, 'timestamp': '2025-09-10 02:37:52.300279', 'step': 1114, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:52.352778', 'step': 1114, 'epoch': 1} +{'type': 'loss', 'content': 0.025626661255955696, 'timestamp': '2025-09-10 02:37:52.354774', 'step': 1115, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:52.407351', 'step': 1115, 'epoch': 1} +{'type': 'loss', 'content': 0.005912961903959513, 'timestamp': '2025-09-10 02:37:52.412799', 'step': 1116, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:52.465936', 'step': 1116, 'epoch': 1} +{'type': 'loss', 'content': 0.009867900051176548, 'timestamp': '2025-09-10 02:37:52.467780', 'step': 1117, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:52.519824', 'step': 1117, 'epoch': 1} +{'type': 'loss', 'content': 0.039868734776973724, 'timestamp': '2025-09-10 02:37:52.521873', 'step': 1118, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:52.573781', 'step': 1118, 'epoch': 1} +{'type': 'loss', 'content': 0.013871312141418457, 'timestamp': '2025-09-10 02:37:52.575773', 'step': 1119, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:52.627678', 'step': 1119, 'epoch': 1} +{'type': 'loss', 'content': 0.017347069457173347, 'timestamp': '2025-09-10 02:37:52.633198', 'step': 1120, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:52.684659', 'step': 1120, 'epoch': 1} +{'type': 'loss', 'content': 0.002342894207686186, 'timestamp': '2025-09-10 02:37:52.686663', 'step': 1121, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:52.739100', 'step': 1121, 'epoch': 1} +{'type': 'loss', 'content': 0.007890790700912476, 'timestamp': '2025-09-10 02:37:52.741356', 'step': 1122, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:52.793966', 'step': 1122, 'epoch': 1} +{'type': 'loss', 'content': 0.009587155655026436, 'timestamp': '2025-09-10 02:37:52.796944', 'step': 1123, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:52.849597', 'step': 1123, 'epoch': 1} +{'type': 'loss', 'content': 0.0037315296940505505, 'timestamp': '2025-09-10 02:37:52.854992', 'step': 1124, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:52.911968', 'step': 1124, 'epoch': 1} +{'type': 'loss', 'content': 0.019574126228690147, 'timestamp': '2025-09-10 02:37:52.923159', 'step': 1125, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:52.976586', 'step': 1125, 'epoch': 1} +{'type': 'loss', 'content': 0.017041923478245735, 'timestamp': '2025-09-10 02:37:52.978758', 'step': 1126, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:53.031337', 'step': 1126, 'epoch': 1} +{'type': 'loss', 'content': 0.02936060167849064, 'timestamp': '2025-09-10 02:37:53.033775', 'step': 1127, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:53.086188', 'step': 1127, 'epoch': 1} +{'type': 'loss', 'content': 0.0074751111678779125, 'timestamp': '2025-09-10 02:37:53.091927', 'step': 1128, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:37:53.148010', 'step': 1128, 'epoch': 1} +{'type': 'loss', 'content': 0.0022677951492369175, 'timestamp': '2025-09-10 02:37:53.159211', 'step': 1129, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:53.212112', 'step': 1129, 'epoch': 1} +{'type': 'loss', 'content': 0.0010953686432912946, 'timestamp': '2025-09-10 02:37:53.213824', 'step': 1130, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:53.265732', 'step': 1130, 'epoch': 1} +{'type': 'loss', 'content': 0.006479484494775534, 'timestamp': '2025-09-10 02:37:53.272168', 'step': 1131, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:53.324691', 'step': 1131, 'epoch': 1} +{'type': 'loss', 'content': 0.005161761771887541, 'timestamp': '2025-09-10 02:37:53.330197', 'step': 1132, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:53.382178', 'step': 1132, 'epoch': 1} +{'type': 'loss', 'content': 0.014082864858210087, 'timestamp': '2025-09-10 02:37:53.384157', 'step': 1133, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:53.436458', 'step': 1133, 'epoch': 1} +{'type': 'loss', 'content': 0.005313499365001917, 'timestamp': '2025-09-10 02:37:53.438540', 'step': 1134, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:53.491145', 'step': 1134, 'epoch': 1} +{'type': 'loss', 'content': 0.019438592717051506, 'timestamp': '2025-09-10 02:37:53.493156', 'step': 1135, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:53.545747', 'step': 1135, 'epoch': 1} +{'type': 'loss', 'content': 0.0026171307545155287, 'timestamp': '2025-09-10 02:37:53.551366', 'step': 1136, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:37:53.617146', 'step': 1136, 'epoch': 1} +{'type': 'loss', 'content': 0.014032737351953983, 'timestamp': '2025-09-10 02:37:53.628402', 'step': 1137, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:53.681665', 'step': 1137, 'epoch': 1} +{'type': 'loss', 'content': 0.010052693076431751, 'timestamp': '2025-09-10 02:37:53.683698', 'step': 1138, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:37:53.742899', 'step': 1138, 'epoch': 1} +{'type': 'loss', 'content': 0.001705739414319396, 'timestamp': '2025-09-10 02:37:53.753723', 'step': 1139, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:53.806088', 'step': 1139, 'epoch': 1} +{'type': 'loss', 'content': 0.018248632550239563, 'timestamp': '2025-09-10 02:37:53.811508', 'step': 1140, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:53.863735', 'step': 1140, 'epoch': 1} +{'type': 'loss', 'content': 0.002061953069642186, 'timestamp': '2025-09-10 02:37:53.866687', 'step': 1141, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:53.919446', 'step': 1141, 'epoch': 1} +{'type': 'loss', 'content': 0.005538515746593475, 'timestamp': '2025-09-10 02:37:53.927691', 'step': 1142, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:53.980148', 'step': 1142, 'epoch': 1} +{'type': 'loss', 'content': 0.0036665797233581543, 'timestamp': '2025-09-10 02:37:53.982842', 'step': 1143, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:54.034929', 'step': 1143, 'epoch': 1} +{'type': 'loss', 'content': 0.008468905463814735, 'timestamp': '2025-09-10 02:37:54.040485', 'step': 1144, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:54.092161', 'step': 1144, 'epoch': 1} +{'type': 'loss', 'content': 0.009219801053404808, 'timestamp': '2025-09-10 02:37:54.094586', 'step': 1145, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:37:54.146885', 'step': 1145, 'epoch': 1} +{'type': 'loss', 'content': 0.0010887905955314636, 'timestamp': '2025-09-10 02:37:54.149390', 'step': 1146, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:54.201551', 'step': 1146, 'epoch': 1} +{'type': 'loss', 'content': 0.0035215781535953283, 'timestamp': '2025-09-10 02:37:54.203665', 'step': 1147, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:54.256210', 'step': 1147, 'epoch': 1} +{'type': 'loss', 'content': 0.010627214796841145, 'timestamp': '2025-09-10 02:37:54.261895', 'step': 1148, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:54.313808', 'step': 1148, 'epoch': 1} +{'type': 'loss', 'content': 0.033319178968667984, 'timestamp': '2025-09-10 02:37:54.316698', 'step': 1149, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:54.369947', 'step': 1149, 'epoch': 1} +{'type': 'loss', 'content': 0.001132026081904769, 'timestamp': '2025-09-10 02:37:54.373187', 'step': 1150, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:54.426003', 'step': 1150, 'epoch': 1} +{'type': 'loss', 'content': 0.013359389267861843, 'timestamp': '2025-09-10 02:37:54.427894', 'step': 1151, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:54.480914', 'step': 1151, 'epoch': 1} +{'type': 'loss', 'content': 0.013103527016937733, 'timestamp': '2025-09-10 02:37:54.491249', 'step': 1152, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:54.543087', 'step': 1152, 'epoch': 1} +{'type': 'loss', 'content': 0.00590308103710413, 'timestamp': '2025-09-10 02:37:54.544869', 'step': 1153, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:54.597236', 'step': 1153, 'epoch': 1} +{'type': 'loss', 'content': 0.006476046051830053, 'timestamp': '2025-09-10 02:37:54.599275', 'step': 1154, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:54.651975', 'step': 1154, 'epoch': 1} +{'type': 'loss', 'content': 0.00043728688615374267, 'timestamp': '2025-09-10 02:37:54.658610', 'step': 1155, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:54.711023', 'step': 1155, 'epoch': 1} +{'type': 'loss', 'content': 0.024524131789803505, 'timestamp': '2025-09-10 02:37:54.717060', 'step': 1156, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:54.769756', 'step': 1156, 'epoch': 1} +{'type': 'loss', 'content': 0.03014794923365116, 'timestamp': '2025-09-10 02:37:54.772194', 'step': 1157, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:37:54.825546', 'step': 1157, 'epoch': 1} +{'type': 'loss', 'content': 0.007397075649350882, 'timestamp': '2025-09-10 02:37:54.827584', 'step': 1158, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:54.880182', 'step': 1158, 'epoch': 1} +{'type': 'loss', 'content': 0.019256308674812317, 'timestamp': '2025-09-10 02:37:54.882531', 'step': 1159, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:37:54.935246', 'step': 1159, 'epoch': 1} +{'type': 'loss', 'content': 0.003273534355685115, 'timestamp': '2025-09-10 02:37:54.940675', 'step': 1160, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:54.993288', 'step': 1160, 'epoch': 1} +{'type': 'loss', 'content': 0.006755192764103413, 'timestamp': '2025-09-10 02:37:54.999645', 'step': 1161, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:55.052201', 'step': 1161, 'epoch': 1} +{'type': 'loss', 'content': 0.003680414753034711, 'timestamp': '2025-09-10 02:37:55.055821', 'step': 1162, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:55.111138', 'step': 1162, 'epoch': 1} +{'type': 'loss', 'content': 0.012628121301531792, 'timestamp': '2025-09-10 02:37:55.114127', 'step': 1163, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:55.167633', 'step': 1163, 'epoch': 1} +{'type': 'loss', 'content': 0.012532063759863377, 'timestamp': '2025-09-10 02:37:55.174039', 'step': 1164, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:55.230030', 'step': 1164, 'epoch': 1} +{'type': 'loss', 'content': 0.027490537613630295, 'timestamp': '2025-09-10 02:37:55.233888', 'step': 1165, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:55.288580', 'step': 1165, 'epoch': 1} +{'type': 'loss', 'content': 0.0025479462929069996, 'timestamp': '2025-09-10 02:37:55.294543', 'step': 1166, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:37:55.349019', 'step': 1166, 'epoch': 1} +{'type': 'loss', 'content': 0.005940371658653021, 'timestamp': '2025-09-10 02:37:55.354346', 'step': 1167, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:37:55.410499', 'step': 1167, 'epoch': 1} +{'type': 'loss', 'content': 0.01090217474848032, 'timestamp': '2025-09-10 02:37:55.417464', 'step': 1168, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:37:55.486228', 'step': 1168, 'epoch': 1} +{'type': 'loss', 'content': 0.05105715990066528, 'timestamp': '2025-09-10 02:37:55.499867', 'step': 1169, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:37:55.563571', 'step': 1169, 'epoch': 1} +{'type': 'loss', 'content': 0.04473403841257095, 'timestamp': '2025-09-10 02:37:55.566427', 'step': 1170, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:37:55.633662', 'step': 1170, 'epoch': 1} +{'type': 'loss', 'content': 0.0012067670468240976, 'timestamp': '2025-09-10 02:37:55.636196', 'step': 1171, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:37:55.690215', 'step': 1171, 'epoch': 1} +{'type': 'loss', 'content': 0.002647282090038061, 'timestamp': '2025-09-10 02:37:55.700623', 'step': 1172, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:55.754154', 'step': 1172, 'epoch': 1} +{'type': 'loss', 'content': 0.006060405168682337, 'timestamp': '2025-09-10 02:37:55.762251', 'step': 1173, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:37:55.816354', 'step': 1173, 'epoch': 1} +{'type': 'loss', 'content': 0.010575481690466404, 'timestamp': '2025-09-10 02:37:55.818535', 'step': 1174, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:37:55.910600', 'step': 1174, 'epoch': 1} +{'type': 'loss', 'content': 0.04444717988371849, 'timestamp': '2025-09-10 02:37:55.923274', 'step': 1175, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:37:55.977409', 'step': 1175, 'epoch': 1} +{'type': 'loss', 'content': 0.0008260281756520271, 'timestamp': '2025-09-10 02:37:55.986267', 'step': 1176, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:38:12.898862', 'step': 1176, 'epoch': 1} +{'type': 'pplx', 'content': 27345006.897273418, 'timestamp': '2025-09-10 02:38:12.901641', 'step': 1176, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:12.958740', 'step': 1176, 'epoch': 1} +{'type': 'loss', 'content': 0.022977551445364952, 'timestamp': '2025-09-10 02:38:12.966266', 'step': 1177, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:13.020467', 'step': 1177, 'epoch': 1} +{'type': 'loss', 'content': 0.029089193791151047, 'timestamp': '2025-09-10 02:38:13.022248', 'step': 1178, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:13.076233', 'step': 1178, 'epoch': 1} +{'type': 'loss', 'content': 0.003365251934155822, 'timestamp': '2025-09-10 02:38:13.078324', 'step': 1179, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:13.131194', 'step': 1179, 'epoch': 1} +{'type': 'loss', 'content': 0.026831891387701035, 'timestamp': '2025-09-10 02:38:13.137427', 'step': 1180, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:13.189921', 'step': 1180, 'epoch': 1} +{'type': 'loss', 'content': 0.015423259697854519, 'timestamp': '2025-09-10 02:38:13.191795', 'step': 1181, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:38:13.267949', 'step': 1181, 'epoch': 1} +{'type': 'loss', 'content': 0.002096066251397133, 'timestamp': '2025-09-10 02:38:13.282038', 'step': 1182, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:13.336140', 'step': 1182, 'epoch': 1} +{'type': 'loss', 'content': 0.015754317864775658, 'timestamp': '2025-09-10 02:38:13.344080', 'step': 1183, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:13.397561', 'step': 1183, 'epoch': 1} +{'type': 'loss', 'content': 0.0015605379594489932, 'timestamp': '2025-09-10 02:38:13.403909', 'step': 1184, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:13.457070', 'step': 1184, 'epoch': 1} +{'type': 'loss', 'content': 0.04798185080289841, 'timestamp': '2025-09-10 02:38:13.459423', 'step': 1185, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:13.512142', 'step': 1185, 'epoch': 1} +{'type': 'loss', 'content': 0.027037236839532852, 'timestamp': '2025-09-10 02:38:13.515428', 'step': 1186, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:13.569324', 'step': 1186, 'epoch': 1} +{'type': 'loss', 'content': 0.00343741150572896, 'timestamp': '2025-09-10 02:38:13.576677', 'step': 1187, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:13.630314', 'step': 1187, 'epoch': 1} +{'type': 'loss', 'content': 0.009087854065001011, 'timestamp': '2025-09-10 02:38:13.636269', 'step': 1188, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:13.688489', 'step': 1188, 'epoch': 1} +{'type': 'loss', 'content': 0.019215011969208717, 'timestamp': '2025-09-10 02:38:13.690447', 'step': 1189, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:13.743298', 'step': 1189, 'epoch': 1} +{'type': 'loss', 'content': 0.033194538205862045, 'timestamp': '2025-09-10 02:38:13.745396', 'step': 1190, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:13.799027', 'step': 1190, 'epoch': 1} +{'type': 'loss', 'content': 0.019573742523789406, 'timestamp': '2025-09-10 02:38:13.808637', 'step': 1191, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:13.861110', 'step': 1191, 'epoch': 1} +{'type': 'loss', 'content': 0.032782748341560364, 'timestamp': '2025-09-10 02:38:13.866709', 'step': 1192, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:13.923187', 'step': 1192, 'epoch': 1} +{'type': 'loss', 'content': 0.006269785109907389, 'timestamp': '2025-09-10 02:38:13.934428', 'step': 1193, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:13.987892', 'step': 1193, 'epoch': 1} +{'type': 'loss', 'content': 0.01658725179731846, 'timestamp': '2025-09-10 02:38:13.990860', 'step': 1194, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:38:14.083622', 'step': 1194, 'epoch': 1} +{'type': 'loss', 'content': 0.017288723960518837, 'timestamp': '2025-09-10 02:38:14.100964', 'step': 1195, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:14.154133', 'step': 1195, 'epoch': 1} +{'type': 'loss', 'content': 0.010024419054389, 'timestamp': '2025-09-10 02:38:14.162753', 'step': 1196, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:14.215042', 'step': 1196, 'epoch': 1} +{'type': 'loss', 'content': 0.007814670912921429, 'timestamp': '2025-09-10 02:38:14.217997', 'step': 1197, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:38:14.284892', 'step': 1197, 'epoch': 1} +{'type': 'loss', 'content': 0.012869198806583881, 'timestamp': '2025-09-10 02:38:14.297123', 'step': 1198, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:14.350960', 'step': 1198, 'epoch': 1} +{'type': 'loss', 'content': 0.0037422089371830225, 'timestamp': '2025-09-10 02:38:14.353152', 'step': 1199, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:14.406833', 'step': 1199, 'epoch': 1} +{'type': 'loss', 'content': 0.030634628608822823, 'timestamp': '2025-09-10 02:38:14.412835', 'step': 1200, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:14.469790', 'step': 1200, 'epoch': 1} +{'type': 'loss', 'content': 0.012157849036157131, 'timestamp': '2025-09-10 02:38:14.481032', 'step': 1201, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:38:14.553745', 'step': 1201, 'epoch': 1} +{'type': 'loss', 'content': 0.041633132845163345, 'timestamp': '2025-09-10 02:38:14.567215', 'step': 1202, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:38:14.629164', 'step': 1202, 'epoch': 1} +{'type': 'loss', 'content': 0.0014732087729498744, 'timestamp': '2025-09-10 02:38:14.640263', 'step': 1203, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:14.693337', 'step': 1203, 'epoch': 1} +{'type': 'loss', 'content': 0.01730882003903389, 'timestamp': '2025-09-10 02:38:14.699167', 'step': 1204, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:14.751287', 'step': 1204, 'epoch': 1} +{'type': 'loss', 'content': 0.0406925305724144, 'timestamp': '2025-09-10 02:38:14.761452', 'step': 1205, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:14.814922', 'step': 1205, 'epoch': 1} +{'type': 'loss', 'content': 0.014714895747601986, 'timestamp': '2025-09-10 02:38:14.822949', 'step': 1206, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:14.875989', 'step': 1206, 'epoch': 1} +{'type': 'loss', 'content': 0.017625965178012848, 'timestamp': '2025-09-10 02:38:14.884211', 'step': 1207, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:14.937946', 'step': 1207, 'epoch': 1} +{'type': 'loss', 'content': 0.04992163926362991, 'timestamp': '2025-09-10 02:38:14.948374', 'step': 1208, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:15.000625', 'step': 1208, 'epoch': 1} +{'type': 'loss', 'content': 0.007176562212407589, 'timestamp': '2025-09-10 02:38:15.002555', 'step': 1209, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.055144', 'step': 1209, 'epoch': 1} +{'type': 'loss', 'content': 0.027468519285321236, 'timestamp': '2025-09-10 02:38:15.057325', 'step': 1210, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:15.110240', 'step': 1210, 'epoch': 1} +{'type': 'loss', 'content': 0.01314002275466919, 'timestamp': '2025-09-10 02:38:15.116878', 'step': 1211, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:15.170150', 'step': 1211, 'epoch': 1} +{'type': 'loss', 'content': 0.017846744507551193, 'timestamp': '2025-09-10 02:38:15.176310', 'step': 1212, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.232007', 'step': 1212, 'epoch': 1} +{'type': 'loss', 'content': 0.009627328254282475, 'timestamp': '2025-09-10 02:38:15.234229', 'step': 1213, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:15.287080', 'step': 1213, 'epoch': 1} +{'type': 'loss', 'content': 0.018968701362609863, 'timestamp': '2025-09-10 02:38:15.289244', 'step': 1214, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:15.342526', 'step': 1214, 'epoch': 1} +{'type': 'loss', 'content': 0.010385209694504738, 'timestamp': '2025-09-10 02:38:15.348911', 'step': 1215, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:15.401961', 'step': 1215, 'epoch': 1} +{'type': 'loss', 'content': 0.010980025865137577, 'timestamp': '2025-09-10 02:38:15.409216', 'step': 1216, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:15.461660', 'step': 1216, 'epoch': 1} +{'type': 'loss', 'content': 0.02287864498794079, 'timestamp': '2025-09-10 02:38:15.463604', 'step': 1217, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.516676', 'step': 1217, 'epoch': 1} +{'type': 'loss', 'content': 0.02321491949260235, 'timestamp': '2025-09-10 02:38:15.518873', 'step': 1218, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.571683', 'step': 1218, 'epoch': 1} +{'type': 'loss', 'content': 0.012190199457108974, 'timestamp': '2025-09-10 02:38:15.573671', 'step': 1219, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.628782', 'step': 1219, 'epoch': 1} +{'type': 'loss', 'content': 0.008502763696014881, 'timestamp': '2025-09-10 02:38:15.634777', 'step': 1220, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:15.687527', 'step': 1220, 'epoch': 1} +{'type': 'loss', 'content': 0.021801332011818886, 'timestamp': '2025-09-10 02:38:15.689709', 'step': 1221, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:15.742823', 'step': 1221, 'epoch': 1} +{'type': 'loss', 'content': 0.017310811206698418, 'timestamp': '2025-09-10 02:38:15.744838', 'step': 1222, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:15.798401', 'step': 1222, 'epoch': 1} +{'type': 'loss', 'content': 0.021017830818891525, 'timestamp': '2025-09-10 02:38:15.800728', 'step': 1223, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:15.854630', 'step': 1223, 'epoch': 1} +{'type': 'loss', 'content': 0.0442025251686573, 'timestamp': '2025-09-10 02:38:15.865029', 'step': 1224, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:15.919329', 'step': 1224, 'epoch': 1} +{'type': 'loss', 'content': 0.012025970034301281, 'timestamp': '2025-09-10 02:38:15.921666', 'step': 1225, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:15.975498', 'step': 1225, 'epoch': 1} +{'type': 'loss', 'content': 0.020059674978256226, 'timestamp': '2025-09-10 02:38:15.985114', 'step': 1226, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:16.038170', 'step': 1226, 'epoch': 1} +{'type': 'loss', 'content': 0.03154158964753151, 'timestamp': '2025-09-10 02:38:16.040409', 'step': 1227, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:16.092998', 'step': 1227, 'epoch': 1} +{'type': 'loss', 'content': 0.013741062954068184, 'timestamp': '2025-09-10 02:38:16.098747', 'step': 1228, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:16.150938', 'step': 1228, 'epoch': 1} +{'type': 'loss', 'content': 0.021907124668359756, 'timestamp': '2025-09-10 02:38:16.157549', 'step': 1229, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:16.210417', 'step': 1229, 'epoch': 1} +{'type': 'loss', 'content': 0.004827319644391537, 'timestamp': '2025-09-10 02:38:16.212438', 'step': 1230, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.265687', 'step': 1230, 'epoch': 1} +{'type': 'loss', 'content': 0.008723611012101173, 'timestamp': '2025-09-10 02:38:16.267672', 'step': 1231, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:16.320927', 'step': 1231, 'epoch': 1} +{'type': 'loss', 'content': 0.027946332469582558, 'timestamp': '2025-09-10 02:38:16.331306', 'step': 1232, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.386081', 'step': 1232, 'epoch': 1} +{'type': 'loss', 'content': 0.011744270101189613, 'timestamp': '2025-09-10 02:38:16.388189', 'step': 1233, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:16.441696', 'step': 1233, 'epoch': 1} +{'type': 'loss', 'content': 0.018970759585499763, 'timestamp': '2025-09-10 02:38:16.443831', 'step': 1234, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.497105', 'step': 1234, 'epoch': 1} +{'type': 'loss', 'content': 0.03357694670557976, 'timestamp': '2025-09-10 02:38:16.499194', 'step': 1235, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.552121', 'step': 1235, 'epoch': 1} +{'type': 'loss', 'content': 0.005652002990245819, 'timestamp': '2025-09-10 02:38:16.558251', 'step': 1236, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:16.610344', 'step': 1236, 'epoch': 1} +{'type': 'loss', 'content': 0.021448343992233276, 'timestamp': '2025-09-10 02:38:16.616700', 'step': 1237, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:16.670336', 'step': 1237, 'epoch': 1} +{'type': 'loss', 'content': 0.011806346476078033, 'timestamp': '2025-09-10 02:38:16.680007', 'step': 1238, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:16.733381', 'step': 1238, 'epoch': 1} +{'type': 'loss', 'content': 0.013083149679005146, 'timestamp': '2025-09-10 02:38:16.741571', 'step': 1239, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:16.794628', 'step': 1239, 'epoch': 1} +{'type': 'loss', 'content': 0.008364038541913033, 'timestamp': '2025-09-10 02:38:16.800826', 'step': 1240, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:16.853238', 'step': 1240, 'epoch': 1} +{'type': 'loss', 'content': 0.01520631369203329, 'timestamp': '2025-09-10 02:38:16.856085', 'step': 1241, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.908824', 'step': 1241, 'epoch': 1} +{'type': 'loss', 'content': 0.004735896829515696, 'timestamp': '2025-09-10 02:38:16.911035', 'step': 1242, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:16.964002', 'step': 1242, 'epoch': 1} +{'type': 'loss', 'content': 0.010918344371020794, 'timestamp': '2025-09-10 02:38:16.966103', 'step': 1243, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:17.018589', 'step': 1243, 'epoch': 1} +{'type': 'loss', 'content': 0.004988556262105703, 'timestamp': '2025-09-10 02:38:17.024569', 'step': 1244, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:17.077221', 'step': 1244, 'epoch': 1} +{'type': 'loss', 'content': 0.040805794298648834, 'timestamp': '2025-09-10 02:38:17.079275', 'step': 1245, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:17.132795', 'step': 1245, 'epoch': 1} +{'type': 'loss', 'content': 0.016436351463198662, 'timestamp': '2025-09-10 02:38:17.134984', 'step': 1246, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:17.188793', 'step': 1246, 'epoch': 1} +{'type': 'loss', 'content': 0.022019585594534874, 'timestamp': '2025-09-10 02:38:17.191049', 'step': 1247, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:17.244231', 'step': 1247, 'epoch': 1} +{'type': 'loss', 'content': 0.005552917718887329, 'timestamp': '2025-09-10 02:38:17.250126', 'step': 1248, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:17.302429', 'step': 1248, 'epoch': 1} +{'type': 'loss', 'content': 0.040717605501413345, 'timestamp': '2025-09-10 02:38:17.304710', 'step': 1249, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:38:17.370782', 'step': 1249, 'epoch': 1} +{'type': 'loss', 'content': 0.019663427025079727, 'timestamp': '2025-09-10 02:38:17.383025', 'step': 1250, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:17.435656', 'step': 1250, 'epoch': 1} +{'type': 'loss', 'content': 0.010957092978060246, 'timestamp': '2025-09-10 02:38:17.442277', 'step': 1251, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:17.498542', 'step': 1251, 'epoch': 1} +{'type': 'loss', 'content': 0.017680354416370392, 'timestamp': '2025-09-10 02:38:17.504496', 'step': 1252, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:17.557135', 'step': 1252, 'epoch': 1} +{'type': 'loss', 'content': 0.012005984783172607, 'timestamp': '2025-09-10 02:38:17.567396', 'step': 1253, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:17.620900', 'step': 1253, 'epoch': 1} +{'type': 'loss', 'content': 0.013249626383185387, 'timestamp': '2025-09-10 02:38:17.627212', 'step': 1254, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:17.680564', 'step': 1254, 'epoch': 1} +{'type': 'loss', 'content': 0.002336797770112753, 'timestamp': '2025-09-10 02:38:17.682872', 'step': 1255, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:17.735397', 'step': 1255, 'epoch': 1} +{'type': 'loss', 'content': 0.023026512935757637, 'timestamp': '2025-09-10 02:38:17.741284', 'step': 1256, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:17.794005', 'step': 1256, 'epoch': 1} +{'type': 'loss', 'content': 0.007024406921118498, 'timestamp': '2025-09-10 02:38:17.797072', 'step': 1257, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:17.849531', 'step': 1257, 'epoch': 1} +{'type': 'loss', 'content': 0.020878229290246964, 'timestamp': '2025-09-10 02:38:17.851785', 'step': 1258, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:17.904261', 'step': 1258, 'epoch': 1} +{'type': 'loss', 'content': 0.03259377181529999, 'timestamp': '2025-09-10 02:38:17.906331', 'step': 1259, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:17.959703', 'step': 1259, 'epoch': 1} +{'type': 'loss', 'content': 0.014760008081793785, 'timestamp': '2025-09-10 02:38:17.965495', 'step': 1260, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:18.022380', 'step': 1260, 'epoch': 1} +{'type': 'loss', 'content': 0.022210806608200073, 'timestamp': '2025-09-10 02:38:18.033641', 'step': 1261, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:38:18.101455', 'step': 1261, 'epoch': 1} +{'type': 'loss', 'content': 0.01268862932920456, 'timestamp': '2025-09-10 02:38:18.114046', 'step': 1262, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:18.167560', 'step': 1262, 'epoch': 1} +{'type': 'loss', 'content': 0.0026451244484633207, 'timestamp': '2025-09-10 02:38:18.175841', 'step': 1263, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:18.228933', 'step': 1263, 'epoch': 1} +{'type': 'loss', 'content': 0.01006376277655363, 'timestamp': '2025-09-10 02:38:18.234902', 'step': 1264, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:18.287342', 'step': 1264, 'epoch': 1} +{'type': 'loss', 'content': 0.015301442705094814, 'timestamp': '2025-09-10 02:38:18.289703', 'step': 1265, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:18.342503', 'step': 1265, 'epoch': 1} +{'type': 'loss', 'content': 0.02748100645840168, 'timestamp': '2025-09-10 02:38:18.350929', 'step': 1266, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:18.405414', 'step': 1266, 'epoch': 1} +{'type': 'loss', 'content': 0.03254714235663414, 'timestamp': '2025-09-10 02:38:18.415261', 'step': 1267, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:18.467782', 'step': 1267, 'epoch': 1} +{'type': 'loss', 'content': 0.005905542988330126, 'timestamp': '2025-09-10 02:38:18.474988', 'step': 1268, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:18.528224', 'step': 1268, 'epoch': 1} +{'type': 'loss', 'content': 0.033585142344236374, 'timestamp': '2025-09-10 02:38:18.538678', 'step': 1269, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:18.592672', 'step': 1269, 'epoch': 1} +{'type': 'loss', 'content': 0.012435145676136017, 'timestamp': '2025-09-10 02:38:18.594906', 'step': 1270, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:18.647357', 'step': 1270, 'epoch': 1} +{'type': 'loss', 'content': 0.0311743151396513, 'timestamp': '2025-09-10 02:38:18.655566', 'step': 1271, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:38:18.731154', 'step': 1271, 'epoch': 1} +{'type': 'loss', 'content': 0.006537002976983786, 'timestamp': '2025-09-10 02:38:18.746011', 'step': 1272, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:38:18.811001', 'step': 1272, 'epoch': 1} +{'type': 'loss', 'content': 0.006863919552415609, 'timestamp': '2025-09-10 02:38:18.824219', 'step': 1273, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:18.877154', 'step': 1273, 'epoch': 1} +{'type': 'loss', 'content': 0.0139911575242877, 'timestamp': '2025-09-10 02:38:18.879182', 'step': 1274, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:18.931952', 'step': 1274, 'epoch': 1} +{'type': 'loss', 'content': 0.0012018707348033786, 'timestamp': '2025-09-10 02:38:18.934151', 'step': 1275, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:18.986608', 'step': 1275, 'epoch': 1} +{'type': 'loss', 'content': 0.01508967112749815, 'timestamp': '2025-09-10 02:38:18.992352', 'step': 1276, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:19.044139', 'step': 1276, 'epoch': 1} +{'type': 'loss', 'content': 0.010803669691085815, 'timestamp': '2025-09-10 02:38:19.046210', 'step': 1277, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:19.098974', 'step': 1277, 'epoch': 1} +{'type': 'loss', 'content': 0.025407282635569572, 'timestamp': '2025-09-10 02:38:19.107108', 'step': 1278, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:19.159842', 'step': 1278, 'epoch': 1} +{'type': 'loss', 'content': 0.007086531259119511, 'timestamp': '2025-09-10 02:38:19.162068', 'step': 1279, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:19.214911', 'step': 1279, 'epoch': 1} +{'type': 'loss', 'content': 0.01293234247714281, 'timestamp': '2025-09-10 02:38:19.220506', 'step': 1280, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:19.275396', 'step': 1280, 'epoch': 1} +{'type': 'loss', 'content': 0.01469672191888094, 'timestamp': '2025-09-10 02:38:19.277522', 'step': 1281, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:19.330703', 'step': 1281, 'epoch': 1} +{'type': 'loss', 'content': 0.014103470370173454, 'timestamp': '2025-09-10 02:38:19.337199', 'step': 1282, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:19.399760', 'step': 1282, 'epoch': 1} +{'type': 'loss', 'content': 0.02563401870429516, 'timestamp': '2025-09-10 02:38:19.410697', 'step': 1283, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:19.464170', 'step': 1283, 'epoch': 1} +{'type': 'loss', 'content': 0.006677473429590464, 'timestamp': '2025-09-10 02:38:19.469997', 'step': 1284, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:19.522340', 'step': 1284, 'epoch': 1} +{'type': 'loss', 'content': 0.016647594049572945, 'timestamp': '2025-09-10 02:38:19.524453', 'step': 1285, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:19.578283', 'step': 1285, 'epoch': 1} +{'type': 'loss', 'content': 0.0032691562082618475, 'timestamp': '2025-09-10 02:38:19.587921', 'step': 1286, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:19.641644', 'step': 1286, 'epoch': 1} +{'type': 'loss', 'content': 0.004560043569654226, 'timestamp': '2025-09-10 02:38:19.651268', 'step': 1287, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:19.705837', 'step': 1287, 'epoch': 1} +{'type': 'loss', 'content': 0.015763049945235252, 'timestamp': '2025-09-10 02:38:19.716468', 'step': 1288, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:19.773301', 'step': 1288, 'epoch': 1} +{'type': 'loss', 'content': 0.010525521822273731, 'timestamp': '2025-09-10 02:38:19.784470', 'step': 1289, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:19.839252', 'step': 1289, 'epoch': 1} +{'type': 'loss', 'content': 0.014197276905179024, 'timestamp': '2025-09-10 02:38:19.841538', 'step': 1290, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:19.895219', 'step': 1290, 'epoch': 1} +{'type': 'loss', 'content': 0.002154273446649313, 'timestamp': '2025-09-10 02:38:19.897402', 'step': 1291, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:19.950254', 'step': 1291, 'epoch': 1} +{'type': 'loss', 'content': 0.004334344062954187, 'timestamp': '2025-09-10 02:38:19.955868', 'step': 1292, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:20.008145', 'step': 1292, 'epoch': 1} +{'type': 'loss', 'content': 0.01705791987478733, 'timestamp': '2025-09-10 02:38:20.014902', 'step': 1293, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:20.067808', 'step': 1293, 'epoch': 1} +{'type': 'loss', 'content': 0.0029170287307351828, 'timestamp': '2025-09-10 02:38:20.076005', 'step': 1294, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:20.128619', 'step': 1294, 'epoch': 1} +{'type': 'loss', 'content': 0.007720049936324358, 'timestamp': '2025-09-10 02:38:20.130868', 'step': 1295, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:20.183277', 'step': 1295, 'epoch': 1} +{'type': 'loss', 'content': 0.043088365346193314, 'timestamp': '2025-09-10 02:38:20.188831', 'step': 1296, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:20.248234', 'step': 1296, 'epoch': 1} +{'type': 'loss', 'content': 0.02546166256070137, 'timestamp': '2025-09-10 02:38:20.260041', 'step': 1297, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:20.312790', 'step': 1297, 'epoch': 1} +{'type': 'loss', 'content': 0.020500587299466133, 'timestamp': '2025-09-10 02:38:20.321233', 'step': 1298, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:20.377055', 'step': 1298, 'epoch': 1} +{'type': 'loss', 'content': 0.013855491764843464, 'timestamp': '2025-09-10 02:38:20.381413', 'step': 1299, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:20.441191', 'step': 1299, 'epoch': 1} +{'type': 'loss', 'content': 0.01237348560243845, 'timestamp': '2025-09-10 02:38:20.446852', 'step': 1300, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:20.498939', 'step': 1300, 'epoch': 1} +{'type': 'loss', 'content': 0.03956538066267967, 'timestamp': '2025-09-10 02:38:20.509244', 'step': 1301, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:38:20.581928', 'step': 1301, 'epoch': 1} +{'type': 'loss', 'content': 0.0031188693828880787, 'timestamp': '2025-09-10 02:38:20.595388', 'step': 1302, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:20.648410', 'step': 1302, 'epoch': 1} +{'type': 'loss', 'content': 0.01015661470592022, 'timestamp': '2025-09-10 02:38:20.650492', 'step': 1303, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:20.709248', 'step': 1303, 'epoch': 1} +{'type': 'loss', 'content': 0.0020863953977823257, 'timestamp': '2025-09-10 02:38:20.715124', 'step': 1304, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:38:20.781036', 'step': 1304, 'epoch': 1} +{'type': 'loss', 'content': 0.008217426016926765, 'timestamp': '2025-09-10 02:38:20.794757', 'step': 1305, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:20.853465', 'step': 1305, 'epoch': 1} +{'type': 'loss', 'content': 0.015589231625199318, 'timestamp': '2025-09-10 02:38:20.855437', 'step': 1306, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:20.908389', 'step': 1306, 'epoch': 1} +{'type': 'loss', 'content': 0.020488901063799858, 'timestamp': '2025-09-10 02:38:20.914876', 'step': 1307, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:20.975170', 'step': 1307, 'epoch': 1} +{'type': 'loss', 'content': 0.024042991921305656, 'timestamp': '2025-09-10 02:38:20.986301', 'step': 1308, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:21.038706', 'step': 1308, 'epoch': 1} +{'type': 'loss', 'content': 0.011314533650875092, 'timestamp': '2025-09-10 02:38:21.041683', 'step': 1309, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:21.096955', 'step': 1309, 'epoch': 1} +{'type': 'loss', 'content': 0.0035076974891126156, 'timestamp': '2025-09-10 02:38:21.098895', 'step': 1310, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:21.151700', 'step': 1310, 'epoch': 1} +{'type': 'loss', 'content': 0.025932367891073227, 'timestamp': '2025-09-10 02:38:21.154654', 'step': 1311, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:21.211634', 'step': 1311, 'epoch': 1} +{'type': 'loss', 'content': 0.012541480362415314, 'timestamp': '2025-09-10 02:38:21.218881', 'step': 1312, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:21.271265', 'step': 1312, 'epoch': 1} +{'type': 'loss', 'content': 0.006389283575117588, 'timestamp': '2025-09-10 02:38:21.279452', 'step': 1313, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:21.333281', 'step': 1313, 'epoch': 1} +{'type': 'loss', 'content': 0.009228929877281189, 'timestamp': '2025-09-10 02:38:21.342924', 'step': 1314, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:21.397526', 'step': 1314, 'epoch': 1} +{'type': 'loss', 'content': 0.029494483023881912, 'timestamp': '2025-09-10 02:38:21.399812', 'step': 1315, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:21.457729', 'step': 1315, 'epoch': 1} +{'type': 'loss', 'content': 0.007591061759740114, 'timestamp': '2025-09-10 02:38:21.466724', 'step': 1316, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:21.531152', 'step': 1316, 'epoch': 1} +{'type': 'loss', 'content': 0.004441217519342899, 'timestamp': '2025-09-10 02:38:21.542981', 'step': 1317, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:38:21.612270', 'step': 1317, 'epoch': 1} +{'type': 'loss', 'content': 0.007398766931146383, 'timestamp': '2025-09-10 02:38:21.624951', 'step': 1318, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:21.678636', 'step': 1318, 'epoch': 1} +{'type': 'loss', 'content': 0.026653682813048363, 'timestamp': '2025-09-10 02:38:21.680728', 'step': 1319, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:21.734595', 'step': 1319, 'epoch': 1} +{'type': 'loss', 'content': 0.0015224191593006253, 'timestamp': '2025-09-10 02:38:21.745139', 'step': 1320, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:21.808181', 'step': 1320, 'epoch': 1} +{'type': 'loss', 'content': 0.0419372096657753, 'timestamp': '2025-09-10 02:38:21.810390', 'step': 1321, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:21.863532', 'step': 1321, 'epoch': 1} +{'type': 'loss', 'content': 0.029287930577993393, 'timestamp': '2025-09-10 02:38:21.866645', 'step': 1322, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:21.921366', 'step': 1322, 'epoch': 1} +{'type': 'loss', 'content': 0.010279752314090729, 'timestamp': '2025-09-10 02:38:21.931136', 'step': 1323, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:38:38.845893', 'step': 1323, 'epoch': 1} +{'type': 'pplx', 'content': 21127182.231462687, 'timestamp': '2025-09-10 02:38:38.848489', 'step': 1323, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:38.905350', 'step': 1323, 'epoch': 1} +{'type': 'loss', 'content': 0.011432381346821785, 'timestamp': '2025-09-10 02:38:38.916599', 'step': 1324, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:38.969748', 'step': 1324, 'epoch': 1} +{'type': 'loss', 'content': 0.021527279168367386, 'timestamp': '2025-09-10 02:38:38.979248', 'step': 1325, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:39.032831', 'step': 1325, 'epoch': 1} +{'type': 'loss', 'content': 0.005625096149742603, 'timestamp': '2025-09-10 02:38:39.040710', 'step': 1326, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:39.099669', 'step': 1326, 'epoch': 1} +{'type': 'loss', 'content': 0.018424388021230698, 'timestamp': '2025-09-10 02:38:39.110124', 'step': 1327, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:39.163479', 'step': 1327, 'epoch': 1} +{'type': 'loss', 'content': 0.010245404206216335, 'timestamp': '2025-09-10 02:38:39.169640', 'step': 1328, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:39.226121', 'step': 1328, 'epoch': 1} +{'type': 'loss', 'content': 0.026787420734763145, 'timestamp': '2025-09-10 02:38:39.227988', 'step': 1329, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:39.282499', 'step': 1329, 'epoch': 1} +{'type': 'loss', 'content': 0.0073931836523115635, 'timestamp': '2025-09-10 02:38:39.292309', 'step': 1330, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:39.353274', 'step': 1330, 'epoch': 1} +{'type': 'loss', 'content': 0.02826576866209507, 'timestamp': '2025-09-10 02:38:39.363759', 'step': 1331, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:39.417634', 'step': 1331, 'epoch': 1} +{'type': 'loss', 'content': 0.006794530898332596, 'timestamp': '2025-09-10 02:38:39.423989', 'step': 1332, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:39.478173', 'step': 1332, 'epoch': 1} +{'type': 'loss', 'content': 0.026355987414717674, 'timestamp': '2025-09-10 02:38:39.488223', 'step': 1333, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:39.541890', 'step': 1333, 'epoch': 1} +{'type': 'loss', 'content': 0.023742901161313057, 'timestamp': '2025-09-10 02:38:39.550036', 'step': 1334, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:38:39.610318', 'step': 1334, 'epoch': 1} +{'type': 'loss', 'content': 0.025258231908082962, 'timestamp': '2025-09-10 02:38:39.621059', 'step': 1335, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:39.679163', 'step': 1335, 'epoch': 1} +{'type': 'loss', 'content': 0.010027949698269367, 'timestamp': '2025-09-10 02:38:39.690379', 'step': 1336, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:39.743662', 'step': 1336, 'epoch': 1} +{'type': 'loss', 'content': 0.0029624791350215673, 'timestamp': '2025-09-10 02:38:39.745724', 'step': 1337, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:39.805317', 'step': 1337, 'epoch': 1} +{'type': 'loss', 'content': 0.006062061991542578, 'timestamp': '2025-09-10 02:38:39.807438', 'step': 1338, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:39.860548', 'step': 1338, 'epoch': 1} +{'type': 'loss', 'content': 0.023339737206697464, 'timestamp': '2025-09-10 02:38:39.862604', 'step': 1339, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:39.916303', 'step': 1339, 'epoch': 1} +{'type': 'loss', 'content': 0.011857804842293262, 'timestamp': '2025-09-10 02:38:39.926712', 'step': 1340, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:39.979518', 'step': 1340, 'epoch': 1} +{'type': 'loss', 'content': 0.0049182153306901455, 'timestamp': '2025-09-10 02:38:39.986213', 'step': 1341, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:40.041688', 'step': 1341, 'epoch': 1} +{'type': 'loss', 'content': 0.0034381321165710688, 'timestamp': '2025-09-10 02:38:40.048456', 'step': 1342, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:40.102040', 'step': 1342, 'epoch': 1} +{'type': 'loss', 'content': 0.008707774803042412, 'timestamp': '2025-09-10 02:38:40.104026', 'step': 1343, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:40.157687', 'step': 1343, 'epoch': 1} +{'type': 'loss', 'content': 0.00808065664023161, 'timestamp': '2025-09-10 02:38:40.168100', 'step': 1344, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:40.220159', 'step': 1344, 'epoch': 1} +{'type': 'loss', 'content': 0.03678285330533981, 'timestamp': '2025-09-10 02:38:40.222078', 'step': 1345, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:40.274560', 'step': 1345, 'epoch': 1} +{'type': 'loss', 'content': 0.008914128877222538, 'timestamp': '2025-09-10 02:38:40.283000', 'step': 1346, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:40.336229', 'step': 1346, 'epoch': 1} +{'type': 'loss', 'content': 0.0018418842228129506, 'timestamp': '2025-09-10 02:38:40.338165', 'step': 1347, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:40.391326', 'step': 1347, 'epoch': 1} +{'type': 'loss', 'content': 0.011227915063500404, 'timestamp': '2025-09-10 02:38:40.396809', 'step': 1348, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:40.449601', 'step': 1348, 'epoch': 1} +{'type': 'loss', 'content': 0.0015851123025640845, 'timestamp': '2025-09-10 02:38:40.456051', 'step': 1349, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:40.509949', 'step': 1349, 'epoch': 1} +{'type': 'loss', 'content': 0.006693967618048191, 'timestamp': '2025-09-10 02:38:40.511917', 'step': 1350, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:40.566108', 'step': 1350, 'epoch': 1} +{'type': 'loss', 'content': 0.026232196018099785, 'timestamp': '2025-09-10 02:38:40.575906', 'step': 1351, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:40.637344', 'step': 1351, 'epoch': 1} +{'type': 'loss', 'content': 0.006545294541865587, 'timestamp': '2025-09-10 02:38:40.649037', 'step': 1352, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:40.703453', 'step': 1352, 'epoch': 1} +{'type': 'loss', 'content': 0.012144649401307106, 'timestamp': '2025-09-10 02:38:40.713975', 'step': 1353, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:40.767520', 'step': 1353, 'epoch': 1} +{'type': 'loss', 'content': 0.011797365732491016, 'timestamp': '2025-09-10 02:38:40.773991', 'step': 1354, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:40.827381', 'step': 1354, 'epoch': 1} +{'type': 'loss', 'content': 0.004145797807723284, 'timestamp': '2025-09-10 02:38:40.829317', 'step': 1355, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:40.882476', 'step': 1355, 'epoch': 1} +{'type': 'loss', 'content': 0.052191197872161865, 'timestamp': '2025-09-10 02:38:40.888491', 'step': 1356, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:40.947686', 'step': 1356, 'epoch': 1} +{'type': 'loss', 'content': 0.026247352361679077, 'timestamp': '2025-09-10 02:38:40.959452', 'step': 1357, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:41.013898', 'step': 1357, 'epoch': 1} +{'type': 'loss', 'content': 0.003235304495319724, 'timestamp': '2025-09-10 02:38:41.015819', 'step': 1358, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:41.070586', 'step': 1358, 'epoch': 1} +{'type': 'loss', 'content': 0.010302268899977207, 'timestamp': '2025-09-10 02:38:41.080400', 'step': 1359, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:41.133891', 'step': 1359, 'epoch': 1} +{'type': 'loss', 'content': 0.00330765126273036, 'timestamp': '2025-09-10 02:38:41.139543', 'step': 1360, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:41.192648', 'step': 1360, 'epoch': 1} +{'type': 'loss', 'content': 0.008435637690126896, 'timestamp': '2025-09-10 02:38:41.198465', 'step': 1361, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:41.252867', 'step': 1361, 'epoch': 1} +{'type': 'loss', 'content': 0.01547850389033556, 'timestamp': '2025-09-10 02:38:41.262650', 'step': 1362, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:38:41.325952', 'step': 1362, 'epoch': 1} +{'type': 'loss', 'content': 0.031536929309368134, 'timestamp': '2025-09-10 02:38:41.336713', 'step': 1363, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:41.397479', 'step': 1363, 'epoch': 1} +{'type': 'loss', 'content': 0.003552355570718646, 'timestamp': '2025-09-10 02:38:41.404390', 'step': 1364, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:38:41.475978', 'step': 1364, 'epoch': 1} +{'type': 'loss', 'content': 0.002945947926491499, 'timestamp': '2025-09-10 02:38:41.487804', 'step': 1365, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:41.555636', 'step': 1365, 'epoch': 1} +{'type': 'loss', 'content': 0.025460409000515938, 'timestamp': '2025-09-10 02:38:41.561901', 'step': 1366, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:41.617729', 'step': 1366, 'epoch': 1} +{'type': 'loss', 'content': 0.04281119629740715, 'timestamp': '2025-09-10 02:38:41.620380', 'step': 1367, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:41.674635', 'step': 1367, 'epoch': 1} +{'type': 'loss', 'content': 0.05582251399755478, 'timestamp': '2025-09-10 02:38:41.683307', 'step': 1368, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:41.739006', 'step': 1368, 'epoch': 1} +{'type': 'loss', 'content': 0.013911671936511993, 'timestamp': '2025-09-10 02:38:41.741881', 'step': 1369, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:41.804215', 'step': 1369, 'epoch': 1} +{'type': 'loss', 'content': 0.03550497815012932, 'timestamp': '2025-09-10 02:38:41.814425', 'step': 1370, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:41.883785', 'step': 1370, 'epoch': 1} +{'type': 'loss', 'content': 0.022621911019086838, 'timestamp': '2025-09-10 02:38:41.889585', 'step': 1371, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:38:41.964496', 'step': 1371, 'epoch': 1} +{'type': 'loss', 'content': 0.03409972041845322, 'timestamp': '2025-09-10 02:38:41.975760', 'step': 1372, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:42.031319', 'step': 1372, 'epoch': 1} +{'type': 'loss', 'content': 0.010691115632653236, 'timestamp': '2025-09-10 02:38:42.038071', 'step': 1373, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:42.092892', 'step': 1373, 'epoch': 1} +{'type': 'loss', 'content': 0.025426147505640984, 'timestamp': '2025-09-10 02:38:42.097110', 'step': 1374, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:42.153376', 'step': 1374, 'epoch': 1} +{'type': 'loss', 'content': 0.013123149052262306, 'timestamp': '2025-09-10 02:38:42.156693', 'step': 1375, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:42.212824', 'step': 1375, 'epoch': 1} +{'type': 'loss', 'content': 0.009240327402949333, 'timestamp': '2025-09-10 02:38:42.221340', 'step': 1376, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:42.276791', 'step': 1376, 'epoch': 1} +{'type': 'loss', 'content': 0.005370268132537603, 'timestamp': '2025-09-10 02:38:42.279578', 'step': 1377, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:42.334280', 'step': 1377, 'epoch': 1} +{'type': 'loss', 'content': 0.0016543889651075006, 'timestamp': '2025-09-10 02:38:42.340883', 'step': 1378, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:42.400070', 'step': 1378, 'epoch': 1} +{'type': 'loss', 'content': 0.034547124058008194, 'timestamp': '2025-09-10 02:38:42.411788', 'step': 1379, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:38:42.515223', 'step': 1379, 'epoch': 1} +{'type': 'loss', 'content': 0.01084803231060505, 'timestamp': '2025-09-10 02:38:42.533321', 'step': 1380, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:42.592123', 'step': 1380, 'epoch': 1} +{'type': 'loss', 'content': 0.008962323889136314, 'timestamp': '2025-09-10 02:38:42.598475', 'step': 1381, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:42.663984', 'step': 1381, 'epoch': 1} +{'type': 'loss', 'content': 0.0072692567482590675, 'timestamp': '2025-09-10 02:38:42.667489', 'step': 1382, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:38:42.750598', 'step': 1382, 'epoch': 1} +{'type': 'loss', 'content': 0.020620524883270264, 'timestamp': '2025-09-10 02:38:42.764289', 'step': 1383, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:42.823775', 'step': 1383, 'epoch': 1} +{'type': 'loss', 'content': 0.019325902685523033, 'timestamp': '2025-09-10 02:38:42.834332', 'step': 1384, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:42.889555', 'step': 1384, 'epoch': 1} +{'type': 'loss', 'content': 0.010693066753447056, 'timestamp': '2025-09-10 02:38:42.891779', 'step': 1385, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:42.957093', 'step': 1385, 'epoch': 1} +{'type': 'loss', 'content': 0.01635764166712761, 'timestamp': '2025-09-10 02:38:42.960885', 'step': 1386, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:43.024361', 'step': 1386, 'epoch': 1} +{'type': 'loss', 'content': 0.019610950723290443, 'timestamp': '2025-09-10 02:38:43.031730', 'step': 1387, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:43.087319', 'step': 1387, 'epoch': 1} +{'type': 'loss', 'content': 0.021304180845618248, 'timestamp': '2025-09-10 02:38:43.093205', 'step': 1388, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:43.148937', 'step': 1388, 'epoch': 1} +{'type': 'loss', 'content': 0.022610317915678024, 'timestamp': '2025-09-10 02:38:43.150982', 'step': 1389, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:43.204180', 'step': 1389, 'epoch': 1} +{'type': 'loss', 'content': 0.009096337482333183, 'timestamp': '2025-09-10 02:38:43.206113', 'step': 1390, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:43.259266', 'step': 1390, 'epoch': 1} +{'type': 'loss', 'content': 0.006592115852981806, 'timestamp': '2025-09-10 02:38:43.261515', 'step': 1391, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:43.314969', 'step': 1391, 'epoch': 1} +{'type': 'loss', 'content': 0.01335006020963192, 'timestamp': '2025-09-10 02:38:43.320807', 'step': 1392, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:43.373808', 'step': 1392, 'epoch': 1} +{'type': 'loss', 'content': 0.019167417660355568, 'timestamp': '2025-09-10 02:38:43.375989', 'step': 1393, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:43.430965', 'step': 1393, 'epoch': 1} +{'type': 'loss', 'content': 0.015288623049855232, 'timestamp': '2025-09-10 02:38:43.440701', 'step': 1394, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:38:43.509661', 'step': 1394, 'epoch': 1} +{'type': 'loss', 'content': 0.005163759924471378, 'timestamp': '2025-09-10 02:38:43.522148', 'step': 1395, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:43.576547', 'step': 1395, 'epoch': 1} +{'type': 'loss', 'content': 0.010330254212021828, 'timestamp': '2025-09-10 02:38:43.583066', 'step': 1396, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:43.635902', 'step': 1396, 'epoch': 1} +{'type': 'loss', 'content': 0.011865033768117428, 'timestamp': '2025-09-10 02:38:43.638683', 'step': 1397, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:43.691774', 'step': 1397, 'epoch': 1} +{'type': 'loss', 'content': 0.016444692388176918, 'timestamp': '2025-09-10 02:38:43.693981', 'step': 1398, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:43.746753', 'step': 1398, 'epoch': 1} +{'type': 'loss', 'content': 0.005546797998249531, 'timestamp': '2025-09-10 02:38:43.748836', 'step': 1399, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:38:43.801957', 'step': 1399, 'epoch': 1} +{'type': 'loss', 'content': 0.014152840711176395, 'timestamp': '2025-09-10 02:38:43.808037', 'step': 1400, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:43.860395', 'step': 1400, 'epoch': 1} +{'type': 'loss', 'content': 0.00697662029415369, 'timestamp': '2025-09-10 02:38:43.862314', 'step': 1401, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:43.915506', 'step': 1401, 'epoch': 1} +{'type': 'loss', 'content': 0.00883783120661974, 'timestamp': '2025-09-10 02:38:43.918007', 'step': 1402, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:43.970843', 'step': 1402, 'epoch': 1} +{'type': 'loss', 'content': 0.0286136232316494, 'timestamp': '2025-09-10 02:38:43.977211', 'step': 1403, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:44.030119', 'step': 1403, 'epoch': 1} +{'type': 'loss', 'content': 0.03485722467303276, 'timestamp': '2025-09-10 02:38:44.036031', 'step': 1404, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:38:44.124002', 'step': 1404, 'epoch': 1} +{'type': 'loss', 'content': 0.029262211173772812, 'timestamp': '2025-09-10 02:38:44.142716', 'step': 1405, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:44.196660', 'step': 1405, 'epoch': 1} +{'type': 'loss', 'content': 0.004903446417301893, 'timestamp': '2025-09-10 02:38:44.198840', 'step': 1406, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:44.253381', 'step': 1406, 'epoch': 1} +{'type': 'loss', 'content': 0.007456667721271515, 'timestamp': '2025-09-10 02:38:44.255301', 'step': 1407, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:44.308243', 'step': 1407, 'epoch': 1} +{'type': 'loss', 'content': 0.010532831773161888, 'timestamp': '2025-09-10 02:38:44.314077', 'step': 1408, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:44.366642', 'step': 1408, 'epoch': 1} +{'type': 'loss', 'content': 0.020802823826670647, 'timestamp': '2025-09-10 02:38:44.368893', 'step': 1409, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:44.422932', 'step': 1409, 'epoch': 1} +{'type': 'loss', 'content': 0.02434101514518261, 'timestamp': '2025-09-10 02:38:44.426192', 'step': 1410, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:44.479712', 'step': 1410, 'epoch': 1} +{'type': 'loss', 'content': 0.014293080195784569, 'timestamp': '2025-09-10 02:38:44.482678', 'step': 1411, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:44.535776', 'step': 1411, 'epoch': 1} +{'type': 'loss', 'content': 0.03320017457008362, 'timestamp': '2025-09-10 02:38:44.541568', 'step': 1412, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:44.593998', 'step': 1412, 'epoch': 1} +{'type': 'loss', 'content': 0.025716817006468773, 'timestamp': '2025-09-10 02:38:44.595969', 'step': 1413, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:44.648682', 'step': 1413, 'epoch': 1} +{'type': 'loss', 'content': 0.008264877833425999, 'timestamp': '2025-09-10 02:38:44.651056', 'step': 1414, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:44.704272', 'step': 1414, 'epoch': 1} +{'type': 'loss', 'content': 0.01690601371228695, 'timestamp': '2025-09-10 02:38:44.707370', 'step': 1415, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:44.760953', 'step': 1415, 'epoch': 1} +{'type': 'loss', 'content': 0.008486853912472725, 'timestamp': '2025-09-10 02:38:44.771182', 'step': 1416, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:44.825214', 'step': 1416, 'epoch': 1} +{'type': 'loss', 'content': 0.013896196149289608, 'timestamp': '2025-09-10 02:38:44.827113', 'step': 1417, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:44.880160', 'step': 1417, 'epoch': 1} +{'type': 'loss', 'content': 0.02865840122103691, 'timestamp': '2025-09-10 02:38:44.882280', 'step': 1418, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:44.935890', 'step': 1418, 'epoch': 1} +{'type': 'loss', 'content': 0.00812804326415062, 'timestamp': '2025-09-10 02:38:44.943814', 'step': 1419, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:44.996775', 'step': 1419, 'epoch': 1} +{'type': 'loss', 'content': 0.00727180065587163, 'timestamp': '2025-09-10 02:38:45.003571', 'step': 1420, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:45.055310', 'step': 1420, 'epoch': 1} +{'type': 'loss', 'content': 0.007957945577800274, 'timestamp': '2025-09-10 02:38:45.058411', 'step': 1421, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:45.112729', 'step': 1421, 'epoch': 1} +{'type': 'loss', 'content': 0.02688734047114849, 'timestamp': '2025-09-10 02:38:45.115101', 'step': 1422, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:45.168890', 'step': 1422, 'epoch': 1} +{'type': 'loss', 'content': 0.05237075686454773, 'timestamp': '2025-09-10 02:38:45.170978', 'step': 1423, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:45.224712', 'step': 1423, 'epoch': 1} +{'type': 'loss', 'content': 0.015660319477319717, 'timestamp': '2025-09-10 02:38:45.230725', 'step': 1424, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:45.283283', 'step': 1424, 'epoch': 1} +{'type': 'loss', 'content': 0.0025016695726662874, 'timestamp': '2025-09-10 02:38:45.285497', 'step': 1425, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:45.338341', 'step': 1425, 'epoch': 1} +{'type': 'loss', 'content': 0.016082679852843285, 'timestamp': '2025-09-10 02:38:45.340288', 'step': 1426, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:38:45.393394', 'step': 1426, 'epoch': 1} +{'type': 'loss', 'content': 0.01544844452291727, 'timestamp': '2025-09-10 02:38:45.403037', 'step': 1427, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:45.456029', 'step': 1427, 'epoch': 1} +{'type': 'loss', 'content': 0.01196068525314331, 'timestamp': '2025-09-10 02:38:45.461700', 'step': 1428, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:45.513865', 'step': 1428, 'epoch': 1} +{'type': 'loss', 'content': 0.02642877586185932, 'timestamp': '2025-09-10 02:38:45.515772', 'step': 1429, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:45.568290', 'step': 1429, 'epoch': 1} +{'type': 'loss', 'content': 0.01701239123940468, 'timestamp': '2025-09-10 02:38:45.570344', 'step': 1430, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:38:45.666795', 'step': 1430, 'epoch': 1} +{'type': 'loss', 'content': 0.014857952482998371, 'timestamp': '2025-09-10 02:38:45.685328', 'step': 1431, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:45.738918', 'step': 1431, 'epoch': 1} +{'type': 'loss', 'content': 0.024639811366796494, 'timestamp': '2025-09-10 02:38:45.745009', 'step': 1432, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:45.798548', 'step': 1432, 'epoch': 1} +{'type': 'loss', 'content': 0.027349967509508133, 'timestamp': '2025-09-10 02:38:45.800634', 'step': 1433, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:45.854159', 'step': 1433, 'epoch': 1} +{'type': 'loss', 'content': 0.023200364783406258, 'timestamp': '2025-09-10 02:38:45.856319', 'step': 1434, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:45.910691', 'step': 1434, 'epoch': 1} +{'type': 'loss', 'content': 0.010355520062148571, 'timestamp': '2025-09-10 02:38:45.917579', 'step': 1435, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:45.972234', 'step': 1435, 'epoch': 1} +{'type': 'loss', 'content': 0.009227151982486248, 'timestamp': '2025-09-10 02:38:45.979473', 'step': 1436, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:46.032272', 'step': 1436, 'epoch': 1} +{'type': 'loss', 'content': 0.010424159467220306, 'timestamp': '2025-09-10 02:38:46.034531', 'step': 1437, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:38:46.088130', 'step': 1437, 'epoch': 1} +{'type': 'loss', 'content': 0.01785074546933174, 'timestamp': '2025-09-10 02:38:46.090568', 'step': 1438, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:46.145242', 'step': 1438, 'epoch': 1} +{'type': 'loss', 'content': 0.009487361647188663, 'timestamp': '2025-09-10 02:38:46.147263', 'step': 1439, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:46.200582', 'step': 1439, 'epoch': 1} +{'type': 'loss', 'content': 0.009929628111422062, 'timestamp': '2025-09-10 02:38:46.207919', 'step': 1440, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:46.260240', 'step': 1440, 'epoch': 1} +{'type': 'loss', 'content': 0.009848659858107567, 'timestamp': '2025-09-10 02:38:46.268460', 'step': 1441, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:46.322624', 'step': 1441, 'epoch': 1} +{'type': 'loss', 'content': 0.01061181165277958, 'timestamp': '2025-09-10 02:38:46.325009', 'step': 1442, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:46.378347', 'step': 1442, 'epoch': 1} +{'type': 'loss', 'content': 0.012384439818561077, 'timestamp': '2025-09-10 02:38:46.380457', 'step': 1443, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:46.433725', 'step': 1443, 'epoch': 1} +{'type': 'loss', 'content': 0.014429338276386261, 'timestamp': '2025-09-10 02:38:46.439619', 'step': 1444, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:46.491949', 'step': 1444, 'epoch': 1} +{'type': 'loss', 'content': 0.0038031567819416523, 'timestamp': '2025-09-10 02:38:46.494063', 'step': 1445, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:46.547893', 'step': 1445, 'epoch': 1} +{'type': 'loss', 'content': 0.008766873739659786, 'timestamp': '2025-09-10 02:38:46.549923', 'step': 1446, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:46.603260', 'step': 1446, 'epoch': 1} +{'type': 'loss', 'content': 0.011563843116164207, 'timestamp': '2025-09-10 02:38:46.609676', 'step': 1447, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:38:46.678116', 'step': 1447, 'epoch': 1} +{'type': 'loss', 'content': 0.014462672173976898, 'timestamp': '2025-09-10 02:38:46.691494', 'step': 1448, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:46.745088', 'step': 1448, 'epoch': 1} +{'type': 'loss', 'content': 0.009715719148516655, 'timestamp': '2025-09-10 02:38:46.755591', 'step': 1449, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:46.809121', 'step': 1449, 'epoch': 1} +{'type': 'loss', 'content': 0.012340018525719643, 'timestamp': '2025-09-10 02:38:46.815190', 'step': 1450, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:38:46.868235', 'step': 1450, 'epoch': 1} +{'type': 'loss', 'content': 0.00776516692712903, 'timestamp': '2025-09-10 02:38:46.870326', 'step': 1451, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:38:46.924031', 'step': 1451, 'epoch': 1} +{'type': 'loss', 'content': 0.015312569215893745, 'timestamp': '2025-09-10 02:38:46.930721', 'step': 1452, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:46.983891', 'step': 1452, 'epoch': 1} +{'type': 'loss', 'content': 0.01902756094932556, 'timestamp': '2025-09-10 02:38:46.986056', 'step': 1453, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:47.040782', 'step': 1453, 'epoch': 1} +{'type': 'loss', 'content': 0.013507343828678131, 'timestamp': '2025-09-10 02:38:47.050588', 'step': 1454, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:47.104232', 'step': 1454, 'epoch': 1} +{'type': 'loss', 'content': 0.0173882395029068, 'timestamp': '2025-09-10 02:38:47.112223', 'step': 1455, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:47.165319', 'step': 1455, 'epoch': 1} +{'type': 'loss', 'content': 0.010136603377759457, 'timestamp': '2025-09-10 02:38:47.171045', 'step': 1456, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:47.223749', 'step': 1456, 'epoch': 1} +{'type': 'loss', 'content': 0.01279307622462511, 'timestamp': '2025-09-10 02:38:47.231499', 'step': 1457, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:47.285328', 'step': 1457, 'epoch': 1} +{'type': 'loss', 'content': 0.010654272511601448, 'timestamp': '2025-09-10 02:38:47.287262', 'step': 1458, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:38:47.340376', 'step': 1458, 'epoch': 1} +{'type': 'loss', 'content': 0.011478336527943611, 'timestamp': '2025-09-10 02:38:47.342443', 'step': 1459, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:38:47.396004', 'step': 1459, 'epoch': 1} +{'type': 'loss', 'content': 0.03447409346699715, 'timestamp': '2025-09-10 02:38:47.401916', 'step': 1460, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:47.454169', 'step': 1460, 'epoch': 1} +{'type': 'loss', 'content': 0.007755897007882595, 'timestamp': '2025-09-10 02:38:47.456112', 'step': 1461, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:38:47.509116', 'step': 1461, 'epoch': 1} +{'type': 'loss', 'content': 0.017004257068037987, 'timestamp': '2025-09-10 02:38:47.511854', 'step': 1462, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:38:47.564823', 'step': 1462, 'epoch': 1} +{'type': 'loss', 'content': 0.024744782596826553, 'timestamp': '2025-09-10 02:38:47.572785', 'step': 1463, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:47.626665', 'step': 1463, 'epoch': 1} +{'type': 'loss', 'content': 0.010759426280856133, 'timestamp': '2025-09-10 02:38:47.633267', 'step': 1464, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:38:47.691053', 'step': 1464, 'epoch': 1} +{'type': 'loss', 'content': 0.034266144037246704, 'timestamp': '2025-09-10 02:38:47.693184', 'step': 1465, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:38:47.767127', 'step': 1465, 'epoch': 1} +{'type': 'loss', 'content': 0.028431007638573647, 'timestamp': '2025-09-10 02:38:47.780824', 'step': 1466, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:38:47.848015', 'step': 1466, 'epoch': 1} +{'type': 'loss', 'content': 0.017007270827889442, 'timestamp': '2025-09-10 02:38:47.860188', 'step': 1467, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:38:47.914786', 'step': 1467, 'epoch': 1} +{'type': 'loss', 'content': 0.008189934305846691, 'timestamp': '2025-09-10 02:38:47.920735', 'step': 1468, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:38:47.975536', 'step': 1468, 'epoch': 1} +{'type': 'loss', 'content': 0.020122379064559937, 'timestamp': '2025-09-10 02:38:47.986031', 'step': 1469, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:38:48.058955', 'step': 1469, 'epoch': 1} +{'type': 'loss', 'content': 0.027661120519042015, 'timestamp': '2025-09-10 02:38:48.072415', 'step': 1470, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:39:04.929026', 'step': 1470, 'epoch': 1} +{'type': 'pplx', 'content': 21173319.91827306, 'timestamp': '2025-09-10 02:39:04.932880', 'step': 1470, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:04.993661', 'step': 1470, 'epoch': 1} +{'type': 'loss', 'content': 0.022806530818343163, 'timestamp': '2025-09-10 02:39:04.998657', 'step': 1471, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:39:05.068544', 'step': 1471, 'epoch': 1} +{'type': 'loss', 'content': 0.018918577581644058, 'timestamp': '2025-09-10 02:39:05.081934', 'step': 1472, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:05.137273', 'step': 1472, 'epoch': 1} +{'type': 'loss', 'content': 0.004286808427423239, 'timestamp': '2025-09-10 02:39:05.139591', 'step': 1473, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:05.192617', 'step': 1473, 'epoch': 1} +{'type': 'loss', 'content': 0.018037041649222374, 'timestamp': '2025-09-10 02:39:05.200378', 'step': 1474, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:05.255050', 'step': 1474, 'epoch': 1} +{'type': 'loss', 'content': 0.007983874529600143, 'timestamp': '2025-09-10 02:39:05.264892', 'step': 1475, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:05.318467', 'step': 1475, 'epoch': 1} +{'type': 'loss', 'content': 0.018270786851644516, 'timestamp': '2025-09-10 02:39:05.328878', 'step': 1476, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:05.385765', 'step': 1476, 'epoch': 1} +{'type': 'loss', 'content': 0.039264630526304245, 'timestamp': '2025-09-10 02:39:05.388058', 'step': 1477, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:05.446577', 'step': 1477, 'epoch': 1} +{'type': 'loss', 'content': 0.011250613257288933, 'timestamp': '2025-09-10 02:39:05.456202', 'step': 1478, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:05.510046', 'step': 1478, 'epoch': 1} +{'type': 'loss', 'content': 0.013807500712573528, 'timestamp': '2025-09-10 02:39:05.512074', 'step': 1479, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:05.565778', 'step': 1479, 'epoch': 1} +{'type': 'loss', 'content': 0.005317857023328543, 'timestamp': '2025-09-10 02:39:05.576176', 'step': 1480, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:05.628903', 'step': 1480, 'epoch': 1} +{'type': 'loss', 'content': 0.006169492844492197, 'timestamp': '2025-09-10 02:39:05.637427', 'step': 1481, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:05.696385', 'step': 1481, 'epoch': 1} +{'type': 'loss', 'content': 0.007268204353749752, 'timestamp': '2025-09-10 02:39:05.704041', 'step': 1482, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:05.759583', 'step': 1482, 'epoch': 1} +{'type': 'loss', 'content': 0.0052519855089485645, 'timestamp': '2025-09-10 02:39:05.769153', 'step': 1483, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:05.822324', 'step': 1483, 'epoch': 1} +{'type': 'loss', 'content': 0.002324948785826564, 'timestamp': '2025-09-10 02:39:05.828432', 'step': 1484, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:05.881169', 'step': 1484, 'epoch': 1} +{'type': 'loss', 'content': 0.014736413955688477, 'timestamp': '2025-09-10 02:39:05.886820', 'step': 1485, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:05.942933', 'step': 1485, 'epoch': 1} +{'type': 'loss', 'content': 0.007762397173792124, 'timestamp': '2025-09-10 02:39:05.944976', 'step': 1486, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:05.999591', 'step': 1486, 'epoch': 1} +{'type': 'loss', 'content': 0.007638792973011732, 'timestamp': '2025-09-10 02:39:06.001888', 'step': 1487, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:06.054626', 'step': 1487, 'epoch': 1} +{'type': 'loss', 'content': 0.020237712189555168, 'timestamp': '2025-09-10 02:39:06.062955', 'step': 1488, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:06.115814', 'step': 1488, 'epoch': 1} +{'type': 'loss', 'content': 0.005117752123624086, 'timestamp': '2025-09-10 02:39:06.121374', 'step': 1489, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:39:06.187026', 'step': 1489, 'epoch': 1} +{'type': 'loss', 'content': 0.0018134128767997026, 'timestamp': '2025-09-10 02:39:06.198103', 'step': 1490, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:06.251161', 'step': 1490, 'epoch': 1} +{'type': 'loss', 'content': 0.006517014931887388, 'timestamp': '2025-09-10 02:39:06.253516', 'step': 1491, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:06.306640', 'step': 1491, 'epoch': 1} +{'type': 'loss', 'content': 0.018086465075612068, 'timestamp': '2025-09-10 02:39:06.312842', 'step': 1492, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:06.366823', 'step': 1492, 'epoch': 1} +{'type': 'loss', 'content': 0.024628931656479836, 'timestamp': '2025-09-10 02:39:06.372224', 'step': 1493, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:06.426167', 'step': 1493, 'epoch': 1} +{'type': 'loss', 'content': 0.01478238869458437, 'timestamp': '2025-09-10 02:39:06.428663', 'step': 1494, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:06.485621', 'step': 1494, 'epoch': 1} +{'type': 'loss', 'content': 0.020374033600091934, 'timestamp': '2025-09-10 02:39:06.488179', 'step': 1495, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:06.542787', 'step': 1495, 'epoch': 1} +{'type': 'loss', 'content': 0.0061873276717960835, 'timestamp': '2025-09-10 02:39:06.549333', 'step': 1496, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:06.603791', 'step': 1496, 'epoch': 1} +{'type': 'loss', 'content': 0.014341110363602638, 'timestamp': '2025-09-10 02:39:06.612304', 'step': 1497, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:06.677774', 'step': 1497, 'epoch': 1} +{'type': 'loss', 'content': 0.03801755979657173, 'timestamp': '2025-09-10 02:39:06.679765', 'step': 1498, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:06.732843', 'step': 1498, 'epoch': 1} +{'type': 'loss', 'content': 0.0012635404709726572, 'timestamp': '2025-09-10 02:39:06.734899', 'step': 1499, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:06.788983', 'step': 1499, 'epoch': 1} +{'type': 'loss', 'content': 0.02247563563287258, 'timestamp': '2025-09-10 02:39:06.795243', 'step': 1500, 'epoch': 1} +{'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-10 02:39:07.166577', 'step': 1500, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:07.222639', 'step': 1500, 'epoch': 1} +{'type': 'loss', 'content': 0.012357601895928383, 'timestamp': '2025-09-10 02:39:07.224899', 'step': 1501, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:07.280752', 'step': 1501, 'epoch': 1} +{'type': 'loss', 'content': 0.03305893391370773, 'timestamp': '2025-09-10 02:39:07.284600', 'step': 1502, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:07.339647', 'step': 1502, 'epoch': 1} +{'type': 'loss', 'content': 0.006188906729221344, 'timestamp': '2025-09-10 02:39:07.341993', 'step': 1503, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:39:07.415637', 'step': 1503, 'epoch': 1} +{'type': 'loss', 'content': 0.0038431433495134115, 'timestamp': '2025-09-10 02:39:07.430043', 'step': 1504, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:07.495701', 'step': 1504, 'epoch': 1} +{'type': 'loss', 'content': 0.027286021038889885, 'timestamp': '2025-09-10 02:39:07.497949', 'step': 1505, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:07.553504', 'step': 1505, 'epoch': 1} +{'type': 'loss', 'content': 0.029850173741579056, 'timestamp': '2025-09-10 02:39:07.555623', 'step': 1506, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:07.608180', 'step': 1506, 'epoch': 1} +{'type': 'loss', 'content': 0.019115885719656944, 'timestamp': '2025-09-10 02:39:07.610304', 'step': 1507, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:39:07.678674', 'step': 1507, 'epoch': 1} +{'type': 'loss', 'content': 0.008542048744857311, 'timestamp': '2025-09-10 02:39:07.692159', 'step': 1508, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:07.791964', 'step': 1508, 'epoch': 1} +{'type': 'loss', 'content': 0.011884269304573536, 'timestamp': '2025-09-10 02:39:07.799957', 'step': 1509, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:07.852964', 'step': 1509, 'epoch': 1} +{'type': 'loss', 'content': 0.008217772468924522, 'timestamp': '2025-09-10 02:39:07.856834', 'step': 1510, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:07.910810', 'step': 1510, 'epoch': 1} +{'type': 'loss', 'content': 0.011131289415061474, 'timestamp': '2025-09-10 02:39:07.913061', 'step': 1511, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:07.966523', 'step': 1511, 'epoch': 1} +{'type': 'loss', 'content': 0.011722886934876442, 'timestamp': '2025-09-10 02:39:07.972634', 'step': 1512, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:08.025168', 'step': 1512, 'epoch': 1} +{'type': 'loss', 'content': 0.011907918378710747, 'timestamp': '2025-09-10 02:39:08.027446', 'step': 1513, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:08.084439', 'step': 1513, 'epoch': 1} +{'type': 'loss', 'content': 0.008978028781712055, 'timestamp': '2025-09-10 02:39:08.086830', 'step': 1514, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:08.139448', 'step': 1514, 'epoch': 1} +{'type': 'loss', 'content': 0.00045794612378813326, 'timestamp': '2025-09-10 02:39:08.141892', 'step': 1515, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:08.196005', 'step': 1515, 'epoch': 1} +{'type': 'loss', 'content': 0.011565404012799263, 'timestamp': '2025-09-10 02:39:08.206381', 'step': 1516, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:08.265451', 'step': 1516, 'epoch': 1} +{'type': 'loss', 'content': 0.01076526939868927, 'timestamp': '2025-09-10 02:39:08.270835', 'step': 1517, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:08.329067', 'step': 1517, 'epoch': 1} +{'type': 'loss', 'content': 0.0018057377310469747, 'timestamp': '2025-09-10 02:39:08.335580', 'step': 1518, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:08.399503', 'step': 1518, 'epoch': 1} +{'type': 'loss', 'content': 0.014335406012833118, 'timestamp': '2025-09-10 02:39:08.409284', 'step': 1519, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:08.462729', 'step': 1519, 'epoch': 1} +{'type': 'loss', 'content': 0.009265930391848087, 'timestamp': '2025-09-10 02:39:08.468642', 'step': 1520, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:08.520442', 'step': 1520, 'epoch': 1} +{'type': 'loss', 'content': 0.008001809939742088, 'timestamp': '2025-09-10 02:39:08.523454', 'step': 1521, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:08.576088', 'step': 1521, 'epoch': 1} +{'type': 'loss', 'content': 0.0332258865237236, 'timestamp': '2025-09-10 02:39:08.578634', 'step': 1522, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:08.632707', 'step': 1522, 'epoch': 1} +{'type': 'loss', 'content': 0.023234251886606216, 'timestamp': '2025-09-10 02:39:08.635230', 'step': 1523, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:08.689217', 'step': 1523, 'epoch': 1} +{'type': 'loss', 'content': 0.002109512919560075, 'timestamp': '2025-09-10 02:39:08.694950', 'step': 1524, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:08.747772', 'step': 1524, 'epoch': 1} +{'type': 'loss', 'content': 0.014900286681950092, 'timestamp': '2025-09-10 02:39:08.750635', 'step': 1525, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:08.803372', 'step': 1525, 'epoch': 1} +{'type': 'loss', 'content': 0.007958031259477139, 'timestamp': '2025-09-10 02:39:08.805541', 'step': 1526, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:39:08.873478', 'step': 1526, 'epoch': 1} +{'type': 'loss', 'content': 0.0024172370322048664, 'timestamp': '2025-09-10 02:39:08.886012', 'step': 1527, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:08.940427', 'step': 1527, 'epoch': 1} +{'type': 'loss', 'content': 0.004101971630007029, 'timestamp': '2025-09-10 02:39:08.950830', 'step': 1528, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:09.003198', 'step': 1528, 'epoch': 1} +{'type': 'loss', 'content': 0.01997605338692665, 'timestamp': '2025-09-10 02:39:09.005446', 'step': 1529, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:09.058375', 'step': 1529, 'epoch': 1} +{'type': 'loss', 'content': 0.0052584256045520306, 'timestamp': '2025-09-10 02:39:09.060380', 'step': 1530, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:09.113259', 'step': 1530, 'epoch': 1} +{'type': 'loss', 'content': 0.009853395633399487, 'timestamp': '2025-09-10 02:39:09.121363', 'step': 1531, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:09.175020', 'step': 1531, 'epoch': 1} +{'type': 'loss', 'content': 0.007065530400723219, 'timestamp': '2025-09-10 02:39:09.180627', 'step': 1532, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:39:09.252326', 'step': 1532, 'epoch': 1} +{'type': 'loss', 'content': 0.01046669390052557, 'timestamp': '2025-09-10 02:39:09.267266', 'step': 1533, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:09.320408', 'step': 1533, 'epoch': 1} +{'type': 'loss', 'content': 0.01111555565148592, 'timestamp': '2025-09-10 02:39:09.326936', 'step': 1534, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:09.380736', 'step': 1534, 'epoch': 1} +{'type': 'loss', 'content': 0.008722702972590923, 'timestamp': '2025-09-10 02:39:09.382947', 'step': 1535, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:09.435352', 'step': 1535, 'epoch': 1} +{'type': 'loss', 'content': 0.06579513102769852, 'timestamp': '2025-09-10 02:39:09.441404', 'step': 1536, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:09.493662', 'step': 1536, 'epoch': 1} +{'type': 'loss', 'content': 0.0012011303333565593, 'timestamp': '2025-09-10 02:39:09.496198', 'step': 1537, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:09.550039', 'step': 1537, 'epoch': 1} +{'type': 'loss', 'content': 0.02228461392223835, 'timestamp': '2025-09-10 02:39:09.552321', 'step': 1538, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:09.605023', 'step': 1538, 'epoch': 1} +{'type': 'loss', 'content': 0.008895862847566605, 'timestamp': '2025-09-10 02:39:09.611537', 'step': 1539, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:09.664944', 'step': 1539, 'epoch': 1} +{'type': 'loss', 'content': 0.022968510165810585, 'timestamp': '2025-09-10 02:39:09.670748', 'step': 1540, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:09.726879', 'step': 1540, 'epoch': 1} +{'type': 'loss', 'content': 0.0167404692620039, 'timestamp': '2025-09-10 02:39:09.729142', 'step': 1541, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:09.782058', 'step': 1541, 'epoch': 1} +{'type': 'loss', 'content': 0.039507247507572174, 'timestamp': '2025-09-10 02:39:09.790012', 'step': 1542, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:09.842983', 'step': 1542, 'epoch': 1} +{'type': 'loss', 'content': 0.002729188185185194, 'timestamp': '2025-09-10 02:39:09.845109', 'step': 1543, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:09.898474', 'step': 1543, 'epoch': 1} +{'type': 'loss', 'content': 0.0065974099561572075, 'timestamp': '2025-09-10 02:39:09.905776', 'step': 1544, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:09.964604', 'step': 1544, 'epoch': 1} +{'type': 'loss', 'content': 0.0029151104390621185, 'timestamp': '2025-09-10 02:39:09.976127', 'step': 1545, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:10.029686', 'step': 1545, 'epoch': 1} +{'type': 'loss', 'content': 0.009200065396726131, 'timestamp': '2025-09-10 02:39:10.031840', 'step': 1546, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:10.085454', 'step': 1546, 'epoch': 1} +{'type': 'loss', 'content': 0.022537946701049805, 'timestamp': '2025-09-10 02:39:10.087666', 'step': 1547, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:10.140388', 'step': 1547, 'epoch': 1} +{'type': 'loss', 'content': 0.033098287880420685, 'timestamp': '2025-09-10 02:39:10.147656', 'step': 1548, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:10.200256', 'step': 1548, 'epoch': 1} +{'type': 'loss', 'content': 0.005940968636423349, 'timestamp': '2025-09-10 02:39:10.203017', 'step': 1549, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:10.256150', 'step': 1549, 'epoch': 1} +{'type': 'loss', 'content': 0.009414262138307095, 'timestamp': '2025-09-10 02:39:10.258425', 'step': 1550, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:10.311554', 'step': 1550, 'epoch': 1} +{'type': 'loss', 'content': 0.006693967618048191, 'timestamp': '2025-09-10 02:39:10.319683', 'step': 1551, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:10.372536', 'step': 1551, 'epoch': 1} +{'type': 'loss', 'content': 0.012858742848038673, 'timestamp': '2025-09-10 02:39:10.378316', 'step': 1552, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:10.430599', 'step': 1552, 'epoch': 1} +{'type': 'loss', 'content': 0.025713259354233742, 'timestamp': '2025-09-10 02:39:10.432831', 'step': 1553, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:10.486192', 'step': 1553, 'epoch': 1} +{'type': 'loss', 'content': 0.004746002610772848, 'timestamp': '2025-09-10 02:39:10.488545', 'step': 1554, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:10.542130', 'step': 1554, 'epoch': 1} +{'type': 'loss', 'content': 0.03677273541688919, 'timestamp': '2025-09-10 02:39:10.544321', 'step': 1555, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:10.599015', 'step': 1555, 'epoch': 1} +{'type': 'loss', 'content': 0.001012719003483653, 'timestamp': '2025-09-10 02:39:10.609619', 'step': 1556, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:10.661416', 'step': 1556, 'epoch': 1} +{'type': 'loss', 'content': 0.012111333198845387, 'timestamp': '2025-09-10 02:39:10.664596', 'step': 1557, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:10.718119', 'step': 1557, 'epoch': 1} +{'type': 'loss', 'content': 0.029442133381962776, 'timestamp': '2025-09-10 02:39:10.720277', 'step': 1558, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:10.773200', 'step': 1558, 'epoch': 1} +{'type': 'loss', 'content': 0.04144950583577156, 'timestamp': '2025-09-10 02:39:10.775539', 'step': 1559, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:10.827983', 'step': 1559, 'epoch': 1} +{'type': 'loss', 'content': 0.002831295132637024, 'timestamp': '2025-09-10 02:39:10.833746', 'step': 1560, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:10.886531', 'step': 1560, 'epoch': 1} +{'type': 'loss', 'content': 0.00786892231553793, 'timestamp': '2025-09-10 02:39:10.896155', 'step': 1561, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:10.950935', 'step': 1561, 'epoch': 1} +{'type': 'loss', 'content': 0.005282655358314514, 'timestamp': '2025-09-10 02:39:10.960731', 'step': 1562, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:11.016101', 'step': 1562, 'epoch': 1} +{'type': 'loss', 'content': 0.00401962548494339, 'timestamp': '2025-09-10 02:39:11.018230', 'step': 1563, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:11.071569', 'step': 1563, 'epoch': 1} +{'type': 'loss', 'content': 0.02013194002211094, 'timestamp': '2025-09-10 02:39:11.077338', 'step': 1564, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:11.134995', 'step': 1564, 'epoch': 1} +{'type': 'loss', 'content': 0.0010756131960079074, 'timestamp': '2025-09-10 02:39:11.137022', 'step': 1565, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:11.190643', 'step': 1565, 'epoch': 1} +{'type': 'loss', 'content': 0.02974933385848999, 'timestamp': '2025-09-10 02:39:11.192905', 'step': 1566, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:11.245903', 'step': 1566, 'epoch': 1} +{'type': 'loss', 'content': 0.027059292420744896, 'timestamp': '2025-09-10 02:39:11.253991', 'step': 1567, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:11.306693', 'step': 1567, 'epoch': 1} +{'type': 'loss', 'content': 0.018480705097317696, 'timestamp': '2025-09-10 02:39:11.314956', 'step': 1568, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:11.376439', 'step': 1568, 'epoch': 1} +{'type': 'loss', 'content': 0.009361452423036098, 'timestamp': '2025-09-10 02:39:11.387682', 'step': 1569, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:11.444613', 'step': 1569, 'epoch': 1} +{'type': 'loss', 'content': 0.005706433672457933, 'timestamp': '2025-09-10 02:39:11.446796', 'step': 1570, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:11.499847', 'step': 1570, 'epoch': 1} +{'type': 'loss', 'content': 0.03452344983816147, 'timestamp': '2025-09-10 02:39:11.502757', 'step': 1571, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:11.563123', 'step': 1571, 'epoch': 1} +{'type': 'loss', 'content': 0.003737966064363718, 'timestamp': '2025-09-10 02:39:11.574681', 'step': 1572, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:11.631549', 'step': 1572, 'epoch': 1} +{'type': 'loss', 'content': 0.029324105009436607, 'timestamp': '2025-09-10 02:39:11.641739', 'step': 1573, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:11.694550', 'step': 1573, 'epoch': 1} +{'type': 'loss', 'content': 0.015068896114826202, 'timestamp': '2025-09-10 02:39:11.696718', 'step': 1574, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:11.750302', 'step': 1574, 'epoch': 1} +{'type': 'loss', 'content': 0.01612308993935585, 'timestamp': '2025-09-10 02:39:11.752451', 'step': 1575, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:11.809118', 'step': 1575, 'epoch': 1} +{'type': 'loss', 'content': 0.01985299587249756, 'timestamp': '2025-09-10 02:39:11.814821', 'step': 1576, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:11.867681', 'step': 1576, 'epoch': 1} +{'type': 'loss', 'content': 0.04363846778869629, 'timestamp': '2025-09-10 02:39:11.869986', 'step': 1577, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:11.923163', 'step': 1577, 'epoch': 1} +{'type': 'loss', 'content': 0.008180653676390648, 'timestamp': '2025-09-10 02:39:11.925313', 'step': 1578, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:39:11.978005', 'step': 1578, 'epoch': 1} +{'type': 'loss', 'content': 0.004834081511944532, 'timestamp': '2025-09-10 02:39:11.980087', 'step': 1579, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:12.033006', 'step': 1579, 'epoch': 1} +{'type': 'loss', 'content': 0.01860627345740795, 'timestamp': '2025-09-10 02:39:12.039060', 'step': 1580, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:12.091821', 'step': 1580, 'epoch': 1} +{'type': 'loss', 'content': 0.015397860668599606, 'timestamp': '2025-09-10 02:39:12.094212', 'step': 1581, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:12.147194', 'step': 1581, 'epoch': 1} +{'type': 'loss', 'content': 0.002805066527798772, 'timestamp': '2025-09-10 02:39:12.149379', 'step': 1582, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:12.203446', 'step': 1582, 'epoch': 1} +{'type': 'loss', 'content': 0.05582290142774582, 'timestamp': '2025-09-10 02:39:12.213053', 'step': 1583, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:12.265965', 'step': 1583, 'epoch': 1} +{'type': 'loss', 'content': 0.009821859188377857, 'timestamp': '2025-09-10 02:39:12.271530', 'step': 1584, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:12.323768', 'step': 1584, 'epoch': 1} +{'type': 'loss', 'content': 0.027155477553606033, 'timestamp': '2025-09-10 02:39:12.332182', 'step': 1585, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:12.386791', 'step': 1585, 'epoch': 1} +{'type': 'loss', 'content': 0.007664863485842943, 'timestamp': '2025-09-10 02:39:12.396561', 'step': 1586, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:12.449924', 'step': 1586, 'epoch': 1} +{'type': 'loss', 'content': 0.007052138447761536, 'timestamp': '2025-09-10 02:39:12.452136', 'step': 1587, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:12.505073', 'step': 1587, 'epoch': 1} +{'type': 'loss', 'content': 0.010261270217597485, 'timestamp': '2025-09-10 02:39:12.512315', 'step': 1588, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:12.564950', 'step': 1588, 'epoch': 1} +{'type': 'loss', 'content': 0.0011753434082493186, 'timestamp': '2025-09-10 02:39:12.567149', 'step': 1589, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:12.620040', 'step': 1589, 'epoch': 1} +{'type': 'loss', 'content': 0.005562249571084976, 'timestamp': '2025-09-10 02:39:12.628155', 'step': 1590, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:12.681399', 'step': 1590, 'epoch': 1} +{'type': 'loss', 'content': 0.0065660723485052586, 'timestamp': '2025-09-10 02:39:12.688043', 'step': 1591, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:12.741044', 'step': 1591, 'epoch': 1} +{'type': 'loss', 'content': 0.018280507996678352, 'timestamp': '2025-09-10 02:39:12.746801', 'step': 1592, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:12.798463', 'step': 1592, 'epoch': 1} +{'type': 'loss', 'content': 0.006899405736476183, 'timestamp': '2025-09-10 02:39:12.800420', 'step': 1593, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:12.853795', 'step': 1593, 'epoch': 1} +{'type': 'loss', 'content': 0.0059174662455916405, 'timestamp': '2025-09-10 02:39:12.863383', 'step': 1594, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:12.917096', 'step': 1594, 'epoch': 1} +{'type': 'loss', 'content': 0.02331809140741825, 'timestamp': '2025-09-10 02:39:12.919352', 'step': 1595, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:12.972745', 'step': 1595, 'epoch': 1} +{'type': 'loss', 'content': 0.017544277012348175, 'timestamp': '2025-09-10 02:39:12.978645', 'step': 1596, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:13.031055', 'step': 1596, 'epoch': 1} +{'type': 'loss', 'content': 0.005049147177487612, 'timestamp': '2025-09-10 02:39:13.037712', 'step': 1597, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:13.091054', 'step': 1597, 'epoch': 1} +{'type': 'loss', 'content': 0.0027481592260301113, 'timestamp': '2025-09-10 02:39:13.097713', 'step': 1598, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:13.154354', 'step': 1598, 'epoch': 1} +{'type': 'loss', 'content': 0.003927871584892273, 'timestamp': '2025-09-10 02:39:13.162979', 'step': 1599, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:13.229797', 'step': 1599, 'epoch': 1} +{'type': 'loss', 'content': 0.0011780605418607593, 'timestamp': '2025-09-10 02:39:13.241004', 'step': 1600, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:13.297617', 'step': 1600, 'epoch': 1} +{'type': 'loss', 'content': 0.03326495364308357, 'timestamp': '2025-09-10 02:39:13.299841', 'step': 1601, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:13.357439', 'step': 1601, 'epoch': 1} +{'type': 'loss', 'content': 0.029503485187888145, 'timestamp': '2025-09-10 02:39:13.367068', 'step': 1602, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:13.423063', 'step': 1602, 'epoch': 1} +{'type': 'loss', 'content': 0.015550837852060795, 'timestamp': '2025-09-10 02:39:13.426067', 'step': 1603, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:13.481074', 'step': 1603, 'epoch': 1} +{'type': 'loss', 'content': 0.008897035382688046, 'timestamp': '2025-09-10 02:39:13.491652', 'step': 1604, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:13.543628', 'step': 1604, 'epoch': 1} +{'type': 'loss', 'content': 0.0055795880034565926, 'timestamp': '2025-09-10 02:39:13.546256', 'step': 1605, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:13.599530', 'step': 1605, 'epoch': 1} +{'type': 'loss', 'content': 0.014626068994402885, 'timestamp': '2025-09-10 02:39:13.601798', 'step': 1606, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:13.654906', 'step': 1606, 'epoch': 1} +{'type': 'loss', 'content': 0.02112056128680706, 'timestamp': '2025-09-10 02:39:13.657072', 'step': 1607, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:13.710612', 'step': 1607, 'epoch': 1} +{'type': 'loss', 'content': 0.021492617204785347, 'timestamp': '2025-09-10 02:39:13.716397', 'step': 1608, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:13.770886', 'step': 1608, 'epoch': 1} +{'type': 'loss', 'content': 0.008916768245398998, 'timestamp': '2025-09-10 02:39:13.773276', 'step': 1609, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:13.826737', 'step': 1609, 'epoch': 1} +{'type': 'loss', 'content': 0.027872784063220024, 'timestamp': '2025-09-10 02:39:13.831432', 'step': 1610, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:13.885020', 'step': 1610, 'epoch': 1} +{'type': 'loss', 'content': 0.009076380170881748, 'timestamp': '2025-09-10 02:39:13.887311', 'step': 1611, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:13.942930', 'step': 1611, 'epoch': 1} +{'type': 'loss', 'content': 0.010817697271704674, 'timestamp': '2025-09-10 02:39:13.948775', 'step': 1612, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:14.007650', 'step': 1612, 'epoch': 1} +{'type': 'loss', 'content': 0.01146281324326992, 'timestamp': '2025-09-10 02:39:14.019162', 'step': 1613, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:39:14.087202', 'step': 1613, 'epoch': 1} +{'type': 'loss', 'content': 0.015638425946235657, 'timestamp': '2025-09-10 02:39:14.099801', 'step': 1614, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:14.153409', 'step': 1614, 'epoch': 1} +{'type': 'loss', 'content': 0.029857028275728226, 'timestamp': '2025-09-10 02:39:14.155796', 'step': 1615, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:14.209191', 'step': 1615, 'epoch': 1} +{'type': 'loss', 'content': 0.045049529522657394, 'timestamp': '2025-09-10 02:39:14.216567', 'step': 1616, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:14.272337', 'step': 1616, 'epoch': 1} +{'type': 'loss', 'content': 0.021982843056321144, 'timestamp': '2025-09-10 02:39:14.274565', 'step': 1617, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:39:31.144710', 'step': 1617, 'epoch': 1} +{'type': 'pplx', 'content': 20237851.867195703, 'timestamp': '2025-09-10 02:39:31.147543', 'step': 1617, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:31.202665', 'step': 1617, 'epoch': 1} +{'type': 'loss', 'content': 0.004158311523497105, 'timestamp': '2025-09-10 02:39:31.204597', 'step': 1618, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:31.259206', 'step': 1618, 'epoch': 1} +{'type': 'loss', 'content': 0.0025820934679359198, 'timestamp': '2025-09-10 02:39:31.268231', 'step': 1619, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:31.322087', 'step': 1619, 'epoch': 1} +{'type': 'loss', 'content': 0.013205154798924923, 'timestamp': '2025-09-10 02:39:31.328213', 'step': 1620, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:31.380908', 'step': 1620, 'epoch': 1} +{'type': 'loss', 'content': 0.039854079484939575, 'timestamp': '2025-09-10 02:39:31.383660', 'step': 1621, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:31.439115', 'step': 1621, 'epoch': 1} +{'type': 'loss', 'content': 0.03273060545325279, 'timestamp': '2025-09-10 02:39:31.448953', 'step': 1622, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:31.501939', 'step': 1622, 'epoch': 1} +{'type': 'loss', 'content': 0.0293898768723011, 'timestamp': '2025-09-10 02:39:31.504850', 'step': 1623, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:31.561506', 'step': 1623, 'epoch': 1} +{'type': 'loss', 'content': 0.003011903027072549, 'timestamp': '2025-09-10 02:39:31.568742', 'step': 1624, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:31.621285', 'step': 1624, 'epoch': 1} +{'type': 'loss', 'content': 0.020315011963248253, 'timestamp': '2025-09-10 02:39:31.623264', 'step': 1625, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:31.676975', 'step': 1625, 'epoch': 1} +{'type': 'loss', 'content': 0.02353397198021412, 'timestamp': '2025-09-10 02:39:31.686559', 'step': 1626, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:31.739782', 'step': 1626, 'epoch': 1} +{'type': 'loss', 'content': 0.008481854572892189, 'timestamp': '2025-09-10 02:39:31.741705', 'step': 1627, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:31.794500', 'step': 1627, 'epoch': 1} +{'type': 'loss', 'content': 0.01967434026300907, 'timestamp': '2025-09-10 02:39:31.800331', 'step': 1628, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:31.853077', 'step': 1628, 'epoch': 1} +{'type': 'loss', 'content': 0.008929950185120106, 'timestamp': '2025-09-10 02:39:31.854901', 'step': 1629, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:31.907662', 'step': 1629, 'epoch': 1} +{'type': 'loss', 'content': 0.00881776213645935, 'timestamp': '2025-09-10 02:39:31.914379', 'step': 1630, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:31.967709', 'step': 1630, 'epoch': 1} +{'type': 'loss', 'content': 0.025654584169387817, 'timestamp': '2025-09-10 02:39:31.969708', 'step': 1631, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:32.022704', 'step': 1631, 'epoch': 1} +{'type': 'loss', 'content': 0.016202174127101898, 'timestamp': '2025-09-10 02:39:32.030041', 'step': 1632, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:32.082321', 'step': 1632, 'epoch': 1} +{'type': 'loss', 'content': 0.02773822471499443, 'timestamp': '2025-09-10 02:39:32.084353', 'step': 1633, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:32.137557', 'step': 1633, 'epoch': 1} +{'type': 'loss', 'content': 0.011530165560543537, 'timestamp': '2025-09-10 02:39:32.143957', 'step': 1634, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:32.197255', 'step': 1634, 'epoch': 1} +{'type': 'loss', 'content': 0.011132647283375263, 'timestamp': '2025-09-10 02:39:32.199367', 'step': 1635, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:32.252149', 'step': 1635, 'epoch': 1} +{'type': 'loss', 'content': 0.0033992708195000887, 'timestamp': '2025-09-10 02:39:32.257880', 'step': 1636, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:32.309887', 'step': 1636, 'epoch': 1} +{'type': 'loss', 'content': 0.009040266275405884, 'timestamp': '2025-09-10 02:39:32.312763', 'step': 1637, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:32.365504', 'step': 1637, 'epoch': 1} +{'type': 'loss', 'content': 0.00423599686473608, 'timestamp': '2025-09-10 02:39:32.367561', 'step': 1638, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:32.420192', 'step': 1638, 'epoch': 1} +{'type': 'loss', 'content': 0.017923900857567787, 'timestamp': '2025-09-10 02:39:32.422295', 'step': 1639, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:32.475045', 'step': 1639, 'epoch': 1} +{'type': 'loss', 'content': 0.008748088032007217, 'timestamp': '2025-09-10 02:39:32.480658', 'step': 1640, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:32.533070', 'step': 1640, 'epoch': 1} +{'type': 'loss', 'content': 0.02162143588066101, 'timestamp': '2025-09-10 02:39:32.539741', 'step': 1641, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:32.593385', 'step': 1641, 'epoch': 1} +{'type': 'loss', 'content': 0.005233997944742441, 'timestamp': '2025-09-10 02:39:32.595508', 'step': 1642, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:32.649292', 'step': 1642, 'epoch': 1} +{'type': 'loss', 'content': 0.009843496605753899, 'timestamp': '2025-09-10 02:39:32.658915', 'step': 1643, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:32.712233', 'step': 1643, 'epoch': 1} +{'type': 'loss', 'content': 0.004377818200737238, 'timestamp': '2025-09-10 02:39:32.718032', 'step': 1644, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:32.770683', 'step': 1644, 'epoch': 1} +{'type': 'loss', 'content': 0.011794110760092735, 'timestamp': '2025-09-10 02:39:32.772818', 'step': 1645, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:32.825302', 'step': 1645, 'epoch': 1} +{'type': 'loss', 'content': 0.021618753671646118, 'timestamp': '2025-09-10 02:39:32.827345', 'step': 1646, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:32.880206', 'step': 1646, 'epoch': 1} +{'type': 'loss', 'content': 0.005219961516559124, 'timestamp': '2025-09-10 02:39:32.882218', 'step': 1647, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:32.942213', 'step': 1647, 'epoch': 1} +{'type': 'loss', 'content': 0.0140523137524724, 'timestamp': '2025-09-10 02:39:32.953728', 'step': 1648, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:33.006353', 'step': 1648, 'epoch': 1} +{'type': 'loss', 'content': 0.028450939804315567, 'timestamp': '2025-09-10 02:39:33.008425', 'step': 1649, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:33.061216', 'step': 1649, 'epoch': 1} +{'type': 'loss', 'content': 0.019210809841752052, 'timestamp': '2025-09-10 02:39:33.064343', 'step': 1650, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:33.117667', 'step': 1650, 'epoch': 1} +{'type': 'loss', 'content': 0.025495637208223343, 'timestamp': '2025-09-10 02:39:33.119869', 'step': 1651, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:33.172776', 'step': 1651, 'epoch': 1} +{'type': 'loss', 'content': 0.02168445847928524, 'timestamp': '2025-09-10 02:39:33.178488', 'step': 1652, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:39:33.242840', 'step': 1652, 'epoch': 1} +{'type': 'loss', 'content': 0.017340829595923424, 'timestamp': '2025-09-10 02:39:33.256189', 'step': 1653, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:33.309346', 'step': 1653, 'epoch': 1} +{'type': 'loss', 'content': 0.017994971945881844, 'timestamp': '2025-09-10 02:39:33.311417', 'step': 1654, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:33.364962', 'step': 1654, 'epoch': 1} +{'type': 'loss', 'content': 0.018361778929829597, 'timestamp': '2025-09-10 02:39:33.367023', 'step': 1655, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:33.419925', 'step': 1655, 'epoch': 1} +{'type': 'loss', 'content': 0.0625004991889, 'timestamp': '2025-09-10 02:39:33.425552', 'step': 1656, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:39:33.490265', 'step': 1656, 'epoch': 1} +{'type': 'loss', 'content': 0.032270364463329315, 'timestamp': '2025-09-10 02:39:33.503640', 'step': 1657, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:39:33.576738', 'step': 1657, 'epoch': 1} +{'type': 'loss', 'content': 0.020126575604081154, 'timestamp': '2025-09-10 02:39:33.590372', 'step': 1658, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:33.643712', 'step': 1658, 'epoch': 1} +{'type': 'loss', 'content': 0.012262661941349506, 'timestamp': '2025-09-10 02:39:33.645935', 'step': 1659, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:33.698571', 'step': 1659, 'epoch': 1} +{'type': 'loss', 'content': 0.005183726083487272, 'timestamp': '2025-09-10 02:39:33.704191', 'step': 1660, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:33.757428', 'step': 1660, 'epoch': 1} +{'type': 'loss', 'content': 0.019909026101231575, 'timestamp': '2025-09-10 02:39:33.768057', 'step': 1661, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:33.822888', 'step': 1661, 'epoch': 1} +{'type': 'loss', 'content': 0.008784397505223751, 'timestamp': '2025-09-10 02:39:33.832847', 'step': 1662, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:39:33.907307', 'step': 1662, 'epoch': 1} +{'type': 'loss', 'content': 0.04029487445950508, 'timestamp': '2025-09-10 02:39:33.921417', 'step': 1663, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:33.975507', 'step': 1663, 'epoch': 1} +{'type': 'loss', 'content': 0.010022074915468693, 'timestamp': '2025-09-10 02:39:33.986207', 'step': 1664, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:34.038418', 'step': 1664, 'epoch': 1} +{'type': 'loss', 'content': 0.007610289845615625, 'timestamp': '2025-09-10 02:39:34.041879', 'step': 1665, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:34.095521', 'step': 1665, 'epoch': 1} +{'type': 'loss', 'content': 0.026863878592848778, 'timestamp': '2025-09-10 02:39:34.097600', 'step': 1666, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:34.150305', 'step': 1666, 'epoch': 1} +{'type': 'loss', 'content': 0.03311929479241371, 'timestamp': '2025-09-10 02:39:34.152318', 'step': 1667, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:34.205870', 'step': 1667, 'epoch': 1} +{'type': 'loss', 'content': 0.0065115662291646, 'timestamp': '2025-09-10 02:39:34.216392', 'step': 1668, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:34.269169', 'step': 1668, 'epoch': 1} +{'type': 'loss', 'content': 0.008997505530714989, 'timestamp': '2025-09-10 02:39:34.277411', 'step': 1669, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:34.331262', 'step': 1669, 'epoch': 1} +{'type': 'loss', 'content': 0.044347260147333145, 'timestamp': '2025-09-10 02:39:34.333373', 'step': 1670, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:34.386369', 'step': 1670, 'epoch': 1} +{'type': 'loss', 'content': 0.011004636995494366, 'timestamp': '2025-09-10 02:39:34.388562', 'step': 1671, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:34.441406', 'step': 1671, 'epoch': 1} +{'type': 'loss', 'content': 0.0005539219710044563, 'timestamp': '2025-09-10 02:39:34.447107', 'step': 1672, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:34.500026', 'step': 1672, 'epoch': 1} +{'type': 'loss', 'content': 0.012371420860290527, 'timestamp': '2025-09-10 02:39:34.501973', 'step': 1673, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:34.554456', 'step': 1673, 'epoch': 1} +{'type': 'loss', 'content': 0.0060769012197852135, 'timestamp': '2025-09-10 02:39:34.561210', 'step': 1674, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:34.613987', 'step': 1674, 'epoch': 1} +{'type': 'loss', 'content': 0.02393592707812786, 'timestamp': '2025-09-10 02:39:34.616382', 'step': 1675, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:34.670850', 'step': 1675, 'epoch': 1} +{'type': 'loss', 'content': 0.02526310458779335, 'timestamp': '2025-09-10 02:39:34.681391', 'step': 1676, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:34.733577', 'step': 1676, 'epoch': 1} +{'type': 'loss', 'content': 0.019062532112002373, 'timestamp': '2025-09-10 02:39:34.735779', 'step': 1677, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:34.788288', 'step': 1677, 'epoch': 1} +{'type': 'loss', 'content': 0.002217465080320835, 'timestamp': '2025-09-10 02:39:34.790501', 'step': 1678, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:34.843711', 'step': 1678, 'epoch': 1} +{'type': 'loss', 'content': 0.01228619460016489, 'timestamp': '2025-09-10 02:39:34.852047', 'step': 1679, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:39:34.921439', 'step': 1679, 'epoch': 1} +{'type': 'loss', 'content': 0.011879702098667622, 'timestamp': '2025-09-10 02:39:34.935087', 'step': 1680, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:34.989851', 'step': 1680, 'epoch': 1} +{'type': 'loss', 'content': 0.005328523926436901, 'timestamp': '2025-09-10 02:39:34.993376', 'step': 1681, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:39:35.056976', 'step': 1681, 'epoch': 1} +{'type': 'loss', 'content': 0.009572336450219154, 'timestamp': '2025-09-10 02:39:35.068201', 'step': 1682, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:35.121073', 'step': 1682, 'epoch': 1} +{'type': 'loss', 'content': 0.010687570087611675, 'timestamp': '2025-09-10 02:39:35.123262', 'step': 1683, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:39:35.184615', 'step': 1683, 'epoch': 1} +{'type': 'loss', 'content': 0.009572351351380348, 'timestamp': '2025-09-10 02:39:35.196463', 'step': 1684, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:35.248934', 'step': 1684, 'epoch': 1} +{'type': 'loss', 'content': 0.015068850480020046, 'timestamp': '2025-09-10 02:39:35.250945', 'step': 1685, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:35.303915', 'step': 1685, 'epoch': 1} +{'type': 'loss', 'content': 0.009619450196623802, 'timestamp': '2025-09-10 02:39:35.305878', 'step': 1686, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:35.358821', 'step': 1686, 'epoch': 1} +{'type': 'loss', 'content': 0.0017892051255330443, 'timestamp': '2025-09-10 02:39:35.360992', 'step': 1687, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:35.418516', 'step': 1687, 'epoch': 1} +{'type': 'loss', 'content': 0.019350871443748474, 'timestamp': '2025-09-10 02:39:35.429844', 'step': 1688, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:35.482007', 'step': 1688, 'epoch': 1} +{'type': 'loss', 'content': 0.015114265494048595, 'timestamp': '2025-09-10 02:39:35.492291', 'step': 1689, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:35.545049', 'step': 1689, 'epoch': 1} +{'type': 'loss', 'content': 0.004630051087588072, 'timestamp': '2025-09-10 02:39:35.551670', 'step': 1690, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:35.604738', 'step': 1690, 'epoch': 1} +{'type': 'loss', 'content': 0.012441044673323631, 'timestamp': '2025-09-10 02:39:35.606871', 'step': 1691, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:35.659706', 'step': 1691, 'epoch': 1} +{'type': 'loss', 'content': 0.029850082471966743, 'timestamp': '2025-09-10 02:39:35.665515', 'step': 1692, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:35.717869', 'step': 1692, 'epoch': 1} +{'type': 'loss', 'content': 0.008027377538383007, 'timestamp': '2025-09-10 02:39:35.720100', 'step': 1693, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:35.772875', 'step': 1693, 'epoch': 1} +{'type': 'loss', 'content': 0.02368379756808281, 'timestamp': '2025-09-10 02:39:35.775017', 'step': 1694, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:35.827947', 'step': 1694, 'epoch': 1} +{'type': 'loss', 'content': 0.0030561552848666906, 'timestamp': '2025-09-10 02:39:35.835918', 'step': 1695, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:35.889956', 'step': 1695, 'epoch': 1} +{'type': 'loss', 'content': 0.0022136729676276445, 'timestamp': '2025-09-10 02:39:35.896706', 'step': 1696, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:35.949631', 'step': 1696, 'epoch': 1} +{'type': 'loss', 'content': 0.012324579060077667, 'timestamp': '2025-09-10 02:39:35.959894', 'step': 1697, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:36.014285', 'step': 1697, 'epoch': 1} +{'type': 'loss', 'content': 0.012455495074391365, 'timestamp': '2025-09-10 02:39:36.016765', 'step': 1698, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:36.077896', 'step': 1698, 'epoch': 1} +{'type': 'loss', 'content': 0.012829869985580444, 'timestamp': '2025-09-10 02:39:36.088611', 'step': 1699, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:39:36.157442', 'step': 1699, 'epoch': 1} +{'type': 'loss', 'content': 0.013320697471499443, 'timestamp': '2025-09-10 02:39:36.170850', 'step': 1700, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:36.225323', 'step': 1700, 'epoch': 1} +{'type': 'loss', 'content': 0.00998709350824356, 'timestamp': '2025-09-10 02:39:36.227706', 'step': 1701, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:36.281778', 'step': 1701, 'epoch': 1} +{'type': 'loss', 'content': 0.019189154729247093, 'timestamp': '2025-09-10 02:39:36.284144', 'step': 1702, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:36.342815', 'step': 1702, 'epoch': 1} +{'type': 'loss', 'content': 0.024400973692536354, 'timestamp': '2025-09-10 02:39:36.353247', 'step': 1703, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:36.408251', 'step': 1703, 'epoch': 1} +{'type': 'loss', 'content': 0.019246473908424377, 'timestamp': '2025-09-10 02:39:36.414245', 'step': 1704, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:36.467729', 'step': 1704, 'epoch': 1} +{'type': 'loss', 'content': 0.012863265350461006, 'timestamp': '2025-09-10 02:39:36.473639', 'step': 1705, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:36.527103', 'step': 1705, 'epoch': 1} +{'type': 'loss', 'content': 0.016453877091407776, 'timestamp': '2025-09-10 02:39:36.529161', 'step': 1706, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:36.581847', 'step': 1706, 'epoch': 1} +{'type': 'loss', 'content': 0.020454296842217445, 'timestamp': '2025-09-10 02:39:36.584054', 'step': 1707, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:36.638675', 'step': 1707, 'epoch': 1} +{'type': 'loss', 'content': 0.009613310918211937, 'timestamp': '2025-09-10 02:39:36.649233', 'step': 1708, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:36.703466', 'step': 1708, 'epoch': 1} +{'type': 'loss', 'content': 0.005098339635878801, 'timestamp': '2025-09-10 02:39:36.705472', 'step': 1709, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:36.759205', 'step': 1709, 'epoch': 1} +{'type': 'loss', 'content': 0.02107381820678711, 'timestamp': '2025-09-10 02:39:36.765243', 'step': 1710, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:36.820206', 'step': 1710, 'epoch': 1} +{'type': 'loss', 'content': 0.049266356974840164, 'timestamp': '2025-09-10 02:39:36.822400', 'step': 1711, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:36.875529', 'step': 1711, 'epoch': 1} +{'type': 'loss', 'content': 0.019013753160834312, 'timestamp': '2025-09-10 02:39:36.882011', 'step': 1712, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:36.941354', 'step': 1712, 'epoch': 1} +{'type': 'loss', 'content': 0.01836700178682804, 'timestamp': '2025-09-10 02:39:36.952867', 'step': 1713, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:37.006906', 'step': 1713, 'epoch': 1} +{'type': 'loss', 'content': 0.016758916899561882, 'timestamp': '2025-09-10 02:39:37.008907', 'step': 1714, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:37.062799', 'step': 1714, 'epoch': 1} +{'type': 'loss', 'content': 0.005026537459343672, 'timestamp': '2025-09-10 02:39:37.065149', 'step': 1715, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:37.118921', 'step': 1715, 'epoch': 1} +{'type': 'loss', 'content': 0.014079469256103039, 'timestamp': '2025-09-10 02:39:37.125540', 'step': 1716, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:37.177893', 'step': 1716, 'epoch': 1} +{'type': 'loss', 'content': 0.007057299371808767, 'timestamp': '2025-09-10 02:39:37.180205', 'step': 1717, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:39:37.260507', 'step': 1717, 'epoch': 1} +{'type': 'loss', 'content': 0.008493524976074696, 'timestamp': '2025-09-10 02:39:37.275566', 'step': 1718, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:37.329096', 'step': 1718, 'epoch': 1} +{'type': 'loss', 'content': 0.012126578018069267, 'timestamp': '2025-09-10 02:39:37.331508', 'step': 1719, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:39:37.388093', 'step': 1719, 'epoch': 1} +{'type': 'loss', 'content': 0.007698687259107828, 'timestamp': '2025-09-10 02:39:37.397870', 'step': 1720, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:37.451161', 'step': 1720, 'epoch': 1} +{'type': 'loss', 'content': 0.017780693247914314, 'timestamp': '2025-09-10 02:39:37.453186', 'step': 1721, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:37.507210', 'step': 1721, 'epoch': 1} +{'type': 'loss', 'content': 0.0079504968598485, 'timestamp': '2025-09-10 02:39:37.514984', 'step': 1722, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:37.568490', 'step': 1722, 'epoch': 1} +{'type': 'loss', 'content': 0.020733432844281197, 'timestamp': '2025-09-10 02:39:37.570888', 'step': 1723, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:39:37.632456', 'step': 1723, 'epoch': 1} +{'type': 'loss', 'content': 0.014720803126692772, 'timestamp': '2025-09-10 02:39:37.644327', 'step': 1724, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:37.696949', 'step': 1724, 'epoch': 1} +{'type': 'loss', 'content': 0.011866986751556396, 'timestamp': '2025-09-10 02:39:37.703558', 'step': 1725, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:37.758577', 'step': 1725, 'epoch': 1} +{'type': 'loss', 'content': 0.010004202835261822, 'timestamp': '2025-09-10 02:39:37.761446', 'step': 1726, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:37.822358', 'step': 1726, 'epoch': 1} +{'type': 'loss', 'content': 0.011330981738865376, 'timestamp': '2025-09-10 02:39:37.833221', 'step': 1727, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:37.888160', 'step': 1727, 'epoch': 1} +{'type': 'loss', 'content': 0.03494782745838165, 'timestamp': '2025-09-10 02:39:37.894168', 'step': 1728, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:37.946601', 'step': 1728, 'epoch': 1} +{'type': 'loss', 'content': 0.011625164188444614, 'timestamp': '2025-09-10 02:39:37.948882', 'step': 1729, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:38.001641', 'step': 1729, 'epoch': 1} +{'type': 'loss', 'content': 0.004412360489368439, 'timestamp': '2025-09-10 02:39:38.003882', 'step': 1730, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:38.057813', 'step': 1730, 'epoch': 1} +{'type': 'loss', 'content': 0.011759593151509762, 'timestamp': '2025-09-10 02:39:38.059865', 'step': 1731, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:38.113391', 'step': 1731, 'epoch': 1} +{'type': 'loss', 'content': 0.009773151949048042, 'timestamp': '2025-09-10 02:39:38.119527', 'step': 1732, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:39:38.186000', 'step': 1732, 'epoch': 1} +{'type': 'loss', 'content': 0.055666714906692505, 'timestamp': '2025-09-10 02:39:38.199622', 'step': 1733, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:38.253956', 'step': 1733, 'epoch': 1} +{'type': 'loss', 'content': 0.006420874036848545, 'timestamp': '2025-09-10 02:39:38.256360', 'step': 1734, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:38.309605', 'step': 1734, 'epoch': 1} +{'type': 'loss', 'content': 0.01741214655339718, 'timestamp': '2025-09-10 02:39:38.312342', 'step': 1735, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:38.365530', 'step': 1735, 'epoch': 1} +{'type': 'loss', 'content': 0.020671477541327477, 'timestamp': '2025-09-10 02:39:38.371634', 'step': 1736, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:38.423940', 'step': 1736, 'epoch': 1} +{'type': 'loss', 'content': 0.03731471672654152, 'timestamp': '2025-09-10 02:39:38.426052', 'step': 1737, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:38.479661', 'step': 1737, 'epoch': 1} +{'type': 'loss', 'content': 0.003349814098328352, 'timestamp': '2025-09-10 02:39:38.481872', 'step': 1738, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:38.539854', 'step': 1738, 'epoch': 1} +{'type': 'loss', 'content': 0.011552114970982075, 'timestamp': '2025-09-10 02:39:38.550337', 'step': 1739, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:38.604696', 'step': 1739, 'epoch': 1} +{'type': 'loss', 'content': 0.004306585993617773, 'timestamp': '2025-09-10 02:39:38.610919', 'step': 1740, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:38.663627', 'step': 1740, 'epoch': 1} +{'type': 'loss', 'content': 0.003052855608984828, 'timestamp': '2025-09-10 02:39:38.666353', 'step': 1741, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:38.721349', 'step': 1741, 'epoch': 1} +{'type': 'loss', 'content': 0.002096347976475954, 'timestamp': '2025-09-10 02:39:38.723475', 'step': 1742, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:39:38.814954', 'step': 1742, 'epoch': 1} +{'type': 'loss', 'content': 0.02477678470313549, 'timestamp': '2025-09-10 02:39:38.832036', 'step': 1743, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:38.885364', 'step': 1743, 'epoch': 1} +{'type': 'loss', 'content': 0.004138125106692314, 'timestamp': '2025-09-10 02:39:38.891484', 'step': 1744, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:38.944356', 'step': 1744, 'epoch': 1} +{'type': 'loss', 'content': 0.0016858422895893455, 'timestamp': '2025-09-10 02:39:38.954382', 'step': 1745, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:39.008325', 'step': 1745, 'epoch': 1} +{'type': 'loss', 'content': 0.005374787840992212, 'timestamp': '2025-09-10 02:39:39.010380', 'step': 1746, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:39.065980', 'step': 1746, 'epoch': 1} +{'type': 'loss', 'content': 0.0028152461163699627, 'timestamp': '2025-09-10 02:39:39.067847', 'step': 1747, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:39.121350', 'step': 1747, 'epoch': 1} +{'type': 'loss', 'content': 0.0034364103339612484, 'timestamp': '2025-09-10 02:39:39.127249', 'step': 1748, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:39.180242', 'step': 1748, 'epoch': 1} +{'type': 'loss', 'content': 0.012752421200275421, 'timestamp': '2025-09-10 02:39:39.182328', 'step': 1749, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:39.235189', 'step': 1749, 'epoch': 1} +{'type': 'loss', 'content': 0.008162710815668106, 'timestamp': '2025-09-10 02:39:39.237377', 'step': 1750, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:39:39.307226', 'step': 1750, 'epoch': 1} +{'type': 'loss', 'content': 0.013295854441821575, 'timestamp': '2025-09-10 02:39:39.320126', 'step': 1751, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:39:39.380805', 'step': 1751, 'epoch': 1} +{'type': 'loss', 'content': 0.014668701216578484, 'timestamp': '2025-09-10 02:39:39.392436', 'step': 1752, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:39.445908', 'step': 1752, 'epoch': 1} +{'type': 'loss', 'content': 0.007448400370776653, 'timestamp': '2025-09-10 02:39:39.452376', 'step': 1753, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:39.506197', 'step': 1753, 'epoch': 1} +{'type': 'loss', 'content': 0.01767665334045887, 'timestamp': '2025-09-10 02:39:39.512771', 'step': 1754, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:39.566734', 'step': 1754, 'epoch': 1} +{'type': 'loss', 'content': 0.002524305135011673, 'timestamp': '2025-09-10 02:39:39.569733', 'step': 1755, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:39.623244', 'step': 1755, 'epoch': 1} +{'type': 'loss', 'content': 0.022315729409456253, 'timestamp': '2025-09-10 02:39:39.629361', 'step': 1756, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:39.682071', 'step': 1756, 'epoch': 1} +{'type': 'loss', 'content': 0.004506973084062338, 'timestamp': '2025-09-10 02:39:39.684245', 'step': 1757, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:39.737964', 'step': 1757, 'epoch': 1} +{'type': 'loss', 'content': 0.031227421015501022, 'timestamp': '2025-09-10 02:39:39.739827', 'step': 1758, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:39.792790', 'step': 1758, 'epoch': 1} +{'type': 'loss', 'content': 0.01290964987128973, 'timestamp': '2025-09-10 02:39:39.794797', 'step': 1759, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:39.848931', 'step': 1759, 'epoch': 1} +{'type': 'loss', 'content': 0.008333380334079266, 'timestamp': '2025-09-10 02:39:39.857203', 'step': 1760, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:39.910184', 'step': 1760, 'epoch': 1} +{'type': 'loss', 'content': 0.005028809420764446, 'timestamp': '2025-09-10 02:39:39.912481', 'step': 1761, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:39.966345', 'step': 1761, 'epoch': 1} +{'type': 'loss', 'content': 0.0037227787543088198, 'timestamp': '2025-09-10 02:39:39.975990', 'step': 1762, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:40.029373', 'step': 1762, 'epoch': 1} +{'type': 'loss', 'content': 0.010441971011459827, 'timestamp': '2025-09-10 02:39:40.031721', 'step': 1763, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:40.084630', 'step': 1763, 'epoch': 1} +{'type': 'loss', 'content': 0.009730793535709381, 'timestamp': '2025-09-10 02:39:40.092140', 'step': 1764, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:39:57.278370', 'step': 1764, 'epoch': 1} +{'type': 'pplx', 'content': 23444422.734878693, 'timestamp': '2025-09-10 02:39:57.281336', 'step': 1764, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:57.335376', 'step': 1764, 'epoch': 1} +{'type': 'loss', 'content': 0.007947385311126709, 'timestamp': '2025-09-10 02:39:57.337325', 'step': 1765, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:57.391345', 'step': 1765, 'epoch': 1} +{'type': 'loss', 'content': 0.01799796335399151, 'timestamp': '2025-09-10 02:39:57.393363', 'step': 1766, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:57.447909', 'step': 1766, 'epoch': 1} +{'type': 'loss', 'content': 0.011859401129186153, 'timestamp': '2025-09-10 02:39:57.453227', 'step': 1767, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:57.506581', 'step': 1767, 'epoch': 1} +{'type': 'loss', 'content': 0.00828345213085413, 'timestamp': '2025-09-10 02:39:57.512553', 'step': 1768, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:57.565353', 'step': 1768, 'epoch': 1} +{'type': 'loss', 'content': 0.0010933061130344868, 'timestamp': '2025-09-10 02:39:57.567446', 'step': 1769, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:57.620634', 'step': 1769, 'epoch': 1} +{'type': 'loss', 'content': 0.012491731904447079, 'timestamp': '2025-09-10 02:39:57.624032', 'step': 1770, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:57.678784', 'step': 1770, 'epoch': 1} +{'type': 'loss', 'content': 0.026579495519399643, 'timestamp': '2025-09-10 02:39:57.680934', 'step': 1771, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:57.734443', 'step': 1771, 'epoch': 1} +{'type': 'loss', 'content': 0.004869979806244373, 'timestamp': '2025-09-10 02:39:57.740255', 'step': 1772, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:57.792629', 'step': 1772, 'epoch': 1} +{'type': 'loss', 'content': 0.008269052952528, 'timestamp': '2025-09-10 02:39:57.802681', 'step': 1773, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:57.856097', 'step': 1773, 'epoch': 1} +{'type': 'loss', 'content': 0.0023649216163903475, 'timestamp': '2025-09-10 02:39:57.858283', 'step': 1774, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:39:57.912210', 'step': 1774, 'epoch': 1} +{'type': 'loss', 'content': 0.013124794699251652, 'timestamp': '2025-09-10 02:39:57.914418', 'step': 1775, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:57.967996', 'step': 1775, 'epoch': 1} +{'type': 'loss', 'content': 0.005433118902146816, 'timestamp': '2025-09-10 02:39:57.974281', 'step': 1776, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:58.031112', 'step': 1776, 'epoch': 1} +{'type': 'loss', 'content': 0.036824531853199005, 'timestamp': '2025-09-10 02:39:58.042321', 'step': 1777, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:58.096448', 'step': 1777, 'epoch': 1} +{'type': 'loss', 'content': 0.009437480941414833, 'timestamp': '2025-09-10 02:39:58.104293', 'step': 1778, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:58.157966', 'step': 1778, 'epoch': 1} +{'type': 'loss', 'content': 0.06587214767932892, 'timestamp': '2025-09-10 02:39:58.160157', 'step': 1779, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:58.213369', 'step': 1779, 'epoch': 1} +{'type': 'loss', 'content': 0.034657519310712814, 'timestamp': '2025-09-10 02:39:58.222235', 'step': 1780, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:39:58.282402', 'step': 1780, 'epoch': 1} +{'type': 'loss', 'content': 0.008408496156334877, 'timestamp': '2025-09-10 02:39:58.294371', 'step': 1781, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:58.347901', 'step': 1781, 'epoch': 1} +{'type': 'loss', 'content': 0.0036837959196418524, 'timestamp': '2025-09-10 02:39:58.355985', 'step': 1782, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:58.408962', 'step': 1782, 'epoch': 1} +{'type': 'loss', 'content': 0.023239104077219963, 'timestamp': '2025-09-10 02:39:58.411021', 'step': 1783, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:39:58.477186', 'step': 1783, 'epoch': 1} +{'type': 'loss', 'content': 0.058617573231458664, 'timestamp': '2025-09-10 02:39:58.490214', 'step': 1784, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:58.543189', 'step': 1784, 'epoch': 1} +{'type': 'loss', 'content': 0.013705234043300152, 'timestamp': '2025-09-10 02:39:58.551000', 'step': 1785, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:58.604011', 'step': 1785, 'epoch': 1} +{'type': 'loss', 'content': 0.005106168333441019, 'timestamp': '2025-09-10 02:39:58.605991', 'step': 1786, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:58.658833', 'step': 1786, 'epoch': 1} +{'type': 'loss', 'content': 0.0134356077760458, 'timestamp': '2025-09-10 02:39:58.660883', 'step': 1787, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:39:58.713037', 'step': 1787, 'epoch': 1} +{'type': 'loss', 'content': 0.013329902663826942, 'timestamp': '2025-09-10 02:39:58.719041', 'step': 1788, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:58.771308', 'step': 1788, 'epoch': 1} +{'type': 'loss', 'content': 0.005628393497318029, 'timestamp': '2025-09-10 02:39:58.773603', 'step': 1789, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:39:58.826065', 'step': 1789, 'epoch': 1} +{'type': 'loss', 'content': 0.008878232911229134, 'timestamp': '2025-09-10 02:39:58.828323', 'step': 1790, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:39:58.894544', 'step': 1790, 'epoch': 1} +{'type': 'loss', 'content': 0.010674390010535717, 'timestamp': '2025-09-10 02:39:58.906803', 'step': 1791, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:39:58.959780', 'step': 1791, 'epoch': 1} +{'type': 'loss', 'content': 0.006005376577377319, 'timestamp': '2025-09-10 02:39:58.967076', 'step': 1792, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:39:59.027161', 'step': 1792, 'epoch': 1} +{'type': 'loss', 'content': 0.037848882377147675, 'timestamp': '2025-09-10 02:39:59.039181', 'step': 1793, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:39:59.092192', 'step': 1793, 'epoch': 1} +{'type': 'loss', 'content': 0.007080061826854944, 'timestamp': '2025-09-10 02:39:59.094541', 'step': 1794, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:59.147247', 'step': 1794, 'epoch': 1} +{'type': 'loss', 'content': 0.008358953520655632, 'timestamp': '2025-09-10 02:39:59.149286', 'step': 1795, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:59.202119', 'step': 1795, 'epoch': 1} +{'type': 'loss', 'content': 0.023781998082995415, 'timestamp': '2025-09-10 02:39:59.207752', 'step': 1796, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:59.260042', 'step': 1796, 'epoch': 1} +{'type': 'loss', 'content': 0.00034054036950692534, 'timestamp': '2025-09-10 02:39:59.262180', 'step': 1797, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:59.315466', 'step': 1797, 'epoch': 1} +{'type': 'loss', 'content': 0.028512893244624138, 'timestamp': '2025-09-10 02:39:59.317577', 'step': 1798, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:39:59.370398', 'step': 1798, 'epoch': 1} +{'type': 'loss', 'content': 0.019109424203634262, 'timestamp': '2025-09-10 02:39:59.372704', 'step': 1799, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:39:59.426479', 'step': 1799, 'epoch': 1} +{'type': 'loss', 'content': 0.024009615182876587, 'timestamp': '2025-09-10 02:39:59.436829', 'step': 1800, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:59.493523', 'step': 1800, 'epoch': 1} +{'type': 'loss', 'content': 0.030184131115674973, 'timestamp': '2025-09-10 02:39:59.504708', 'step': 1801, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:59.562638', 'step': 1801, 'epoch': 1} +{'type': 'loss', 'content': 0.0018377343658357859, 'timestamp': '2025-09-10 02:39:59.573076', 'step': 1802, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:39:59.626122', 'step': 1802, 'epoch': 1} +{'type': 'loss', 'content': 0.0016195416683331132, 'timestamp': '2025-09-10 02:39:59.629578', 'step': 1803, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:59.685345', 'step': 1803, 'epoch': 1} +{'type': 'loss', 'content': 0.007264415267854929, 'timestamp': '2025-09-10 02:39:59.691284', 'step': 1804, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:59.743371', 'step': 1804, 'epoch': 1} +{'type': 'loss', 'content': 0.014265798963606358, 'timestamp': '2025-09-10 02:39:59.745575', 'step': 1805, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:39:59.798488', 'step': 1805, 'epoch': 1} +{'type': 'loss', 'content': 0.002825483214110136, 'timestamp': '2025-09-10 02:39:59.800974', 'step': 1806, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:39:59.858687', 'step': 1806, 'epoch': 1} +{'type': 'loss', 'content': 0.0029959457460790873, 'timestamp': '2025-09-10 02:39:59.869117', 'step': 1807, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:39:59.922112', 'step': 1807, 'epoch': 1} +{'type': 'loss', 'content': 0.002854627091437578, 'timestamp': '2025-09-10 02:39:59.927924', 'step': 1808, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:39:59.980147', 'step': 1808, 'epoch': 1} +{'type': 'loss', 'content': 0.0220880676060915, 'timestamp': '2025-09-10 02:39:59.988483', 'step': 1809, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:00.041041', 'step': 1809, 'epoch': 1} +{'type': 'loss', 'content': 0.03752187639474869, 'timestamp': '2025-09-10 02:40:00.047598', 'step': 1810, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:00.100387', 'step': 1810, 'epoch': 1} +{'type': 'loss', 'content': 0.002775424625724554, 'timestamp': '2025-09-10 02:40:00.102557', 'step': 1811, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:00.155872', 'step': 1811, 'epoch': 1} +{'type': 'loss', 'content': 0.006643721368163824, 'timestamp': '2025-09-10 02:40:00.166226', 'step': 1812, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:00.218967', 'step': 1812, 'epoch': 1} +{'type': 'loss', 'content': 0.0041780597530305386, 'timestamp': '2025-09-10 02:40:00.221115', 'step': 1813, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:00.274015', 'step': 1813, 'epoch': 1} +{'type': 'loss', 'content': 0.01680164411664009, 'timestamp': '2025-09-10 02:40:00.280499', 'step': 1814, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:00.333624', 'step': 1814, 'epoch': 1} +{'type': 'loss', 'content': 0.005910966079682112, 'timestamp': '2025-09-10 02:40:00.343266', 'step': 1815, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:00.396359', 'step': 1815, 'epoch': 1} +{'type': 'loss', 'content': 0.012250907719135284, 'timestamp': '2025-09-10 02:40:00.402129', 'step': 1816, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:00.461660', 'step': 1816, 'epoch': 1} +{'type': 'loss', 'content': 0.02061128430068493, 'timestamp': '2025-09-10 02:40:00.473284', 'step': 1817, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:00.526428', 'step': 1817, 'epoch': 1} +{'type': 'loss', 'content': 0.017202133312821388, 'timestamp': '2025-09-10 02:40:00.533004', 'step': 1818, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:00.585773', 'step': 1818, 'epoch': 1} +{'type': 'loss', 'content': 0.0474618636071682, 'timestamp': '2025-09-10 02:40:00.587777', 'step': 1819, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:00.640253', 'step': 1819, 'epoch': 1} +{'type': 'loss', 'content': 0.010387985035777092, 'timestamp': '2025-09-10 02:40:00.649286', 'step': 1820, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:00.700953', 'step': 1820, 'epoch': 1} +{'type': 'loss', 'content': 0.004137419629842043, 'timestamp': '2025-09-10 02:40:00.703352', 'step': 1821, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:00.765449', 'step': 1821, 'epoch': 1} +{'type': 'loss', 'content': 0.007008475251495838, 'timestamp': '2025-09-10 02:40:00.776555', 'step': 1822, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:00.829935', 'step': 1822, 'epoch': 1} +{'type': 'loss', 'content': 0.02086186222732067, 'timestamp': '2025-09-10 02:40:00.831950', 'step': 1823, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:00.886211', 'step': 1823, 'epoch': 1} +{'type': 'loss', 'content': 0.02719251997768879, 'timestamp': '2025-09-10 02:40:00.896806', 'step': 1824, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:00.952178', 'step': 1824, 'epoch': 1} +{'type': 'loss', 'content': 0.014970486052334309, 'timestamp': '2025-09-10 02:40:00.955634', 'step': 1825, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:01.008770', 'step': 1825, 'epoch': 1} +{'type': 'loss', 'content': 0.038791872560977936, 'timestamp': '2025-09-10 02:40:01.011973', 'step': 1826, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:01.065077', 'step': 1826, 'epoch': 1} +{'type': 'loss', 'content': 0.015346644446253777, 'timestamp': '2025-09-10 02:40:01.067218', 'step': 1827, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:01.119835', 'step': 1827, 'epoch': 1} +{'type': 'loss', 'content': 0.030021127313375473, 'timestamp': '2025-09-10 02:40:01.125503', 'step': 1828, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:01.177691', 'step': 1828, 'epoch': 1} +{'type': 'loss', 'content': 0.013160435482859612, 'timestamp': '2025-09-10 02:40:01.179834', 'step': 1829, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:01.233121', 'step': 1829, 'epoch': 1} +{'type': 'loss', 'content': 0.00401655025780201, 'timestamp': '2025-09-10 02:40:01.239295', 'step': 1830, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:01.291948', 'step': 1830, 'epoch': 1} +{'type': 'loss', 'content': 0.0012732602190226316, 'timestamp': '2025-09-10 02:40:01.294242', 'step': 1831, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:01.347306', 'step': 1831, 'epoch': 1} +{'type': 'loss', 'content': 0.01764599420130253, 'timestamp': '2025-09-10 02:40:01.353190', 'step': 1832, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:01.405447', 'step': 1832, 'epoch': 1} +{'type': 'loss', 'content': 0.007014024071395397, 'timestamp': '2025-09-10 02:40:01.407781', 'step': 1833, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:01.460412', 'step': 1833, 'epoch': 1} +{'type': 'loss', 'content': 0.013560867868363857, 'timestamp': '2025-09-10 02:40:01.463280', 'step': 1834, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:01.516631', 'step': 1834, 'epoch': 1} +{'type': 'loss', 'content': 0.004703877028077841, 'timestamp': '2025-09-10 02:40:01.519566', 'step': 1835, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:01.572773', 'step': 1835, 'epoch': 1} +{'type': 'loss', 'content': 0.047076888382434845, 'timestamp': '2025-09-10 02:40:01.579803', 'step': 1836, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:01.632375', 'step': 1836, 'epoch': 1} +{'type': 'loss', 'content': 0.0168437696993351, 'timestamp': '2025-09-10 02:40:01.634776', 'step': 1837, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:01.688465', 'step': 1837, 'epoch': 1} +{'type': 'loss', 'content': 0.009931335225701332, 'timestamp': '2025-09-10 02:40:01.698060', 'step': 1838, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:01.750606', 'step': 1838, 'epoch': 1} +{'type': 'loss', 'content': 0.025910858064889908, 'timestamp': '2025-09-10 02:40:01.752595', 'step': 1839, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:01.805252', 'step': 1839, 'epoch': 1} +{'type': 'loss', 'content': 0.0036237796302884817, 'timestamp': '2025-09-10 02:40:01.815577', 'step': 1840, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:01.867781', 'step': 1840, 'epoch': 1} +{'type': 'loss', 'content': 0.010877500288188457, 'timestamp': '2025-09-10 02:40:01.870018', 'step': 1841, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:01.922919', 'step': 1841, 'epoch': 1} +{'type': 'loss', 'content': 0.010570277459919453, 'timestamp': '2025-09-10 02:40:01.924997', 'step': 1842, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:01.977444', 'step': 1842, 'epoch': 1} +{'type': 'loss', 'content': 0.010649800300598145, 'timestamp': '2025-09-10 02:40:01.980429', 'step': 1843, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:02.033051', 'step': 1843, 'epoch': 1} +{'type': 'loss', 'content': 0.013479354791343212, 'timestamp': '2025-09-10 02:40:02.038913', 'step': 1844, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:02.090901', 'step': 1844, 'epoch': 1} +{'type': 'loss', 'content': 0.01640235260128975, 'timestamp': '2025-09-10 02:40:02.093181', 'step': 1845, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:02.145508', 'step': 1845, 'epoch': 1} +{'type': 'loss', 'content': 0.013565384782850742, 'timestamp': '2025-09-10 02:40:02.148507', 'step': 1846, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:02.201624', 'step': 1846, 'epoch': 1} +{'type': 'loss', 'content': 0.007320315111428499, 'timestamp': '2025-09-10 02:40:02.209608', 'step': 1847, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:02.270017', 'step': 1847, 'epoch': 1} +{'type': 'loss', 'content': 0.014638463035225868, 'timestamp': '2025-09-10 02:40:02.281513', 'step': 1848, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:02.334573', 'step': 1848, 'epoch': 1} +{'type': 'loss', 'content': 0.021155206486582756, 'timestamp': '2025-09-10 02:40:02.337117', 'step': 1849, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:02.390112', 'step': 1849, 'epoch': 1} +{'type': 'loss', 'content': 0.01687222719192505, 'timestamp': '2025-09-10 02:40:02.392457', 'step': 1850, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:02.445413', 'step': 1850, 'epoch': 1} +{'type': 'loss', 'content': 0.02835913561284542, 'timestamp': '2025-09-10 02:40:02.447776', 'step': 1851, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:40:02.515530', 'step': 1851, 'epoch': 1} +{'type': 'loss', 'content': 0.021335987374186516, 'timestamp': '2025-09-10 02:40:02.528911', 'step': 1852, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 16960103024960.0}, 'timestamp': '2025-09-10 02:40:02.649967', 'step': 1852, 'epoch': 1} +{'type': 'loss', 'content': 0.0033114091493189335, 'timestamp': '2025-09-10 02:40:02.676240', 'step': 1853, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:02.730670', 'step': 1853, 'epoch': 1} +{'type': 'loss', 'content': 0.008411731570959091, 'timestamp': '2025-09-10 02:40:02.740484', 'step': 1854, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:02.793419', 'step': 1854, 'epoch': 1} +{'type': 'loss', 'content': 0.018416091799736023, 'timestamp': '2025-09-10 02:40:02.795596', 'step': 1855, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:02.848384', 'step': 1855, 'epoch': 1} +{'type': 'loss', 'content': 0.010702029801905155, 'timestamp': '2025-09-10 02:40:02.854418', 'step': 1856, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:02.912254', 'step': 1856, 'epoch': 1} +{'type': 'loss', 'content': 0.01161882746964693, 'timestamp': '2025-09-10 02:40:02.918883', 'step': 1857, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:02.973696', 'step': 1857, 'epoch': 1} +{'type': 'loss', 'content': 0.01350684929639101, 'timestamp': '2025-09-10 02:40:02.975500', 'step': 1858, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:03.039442', 'step': 1858, 'epoch': 1} +{'type': 'loss', 'content': 0.019679781049489975, 'timestamp': '2025-09-10 02:40:03.046144', 'step': 1859, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:03.107187', 'step': 1859, 'epoch': 1} +{'type': 'loss', 'content': 0.02340617962181568, 'timestamp': '2025-09-10 02:40:03.119020', 'step': 1860, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:03.171773', 'step': 1860, 'epoch': 1} +{'type': 'loss', 'content': 0.012109608389437199, 'timestamp': '2025-09-10 02:40:03.174030', 'step': 1861, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:40:03.249546', 'step': 1861, 'epoch': 1} +{'type': 'loss', 'content': 0.008661939762532711, 'timestamp': '2025-09-10 02:40:03.263599', 'step': 1862, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:40:03.337949', 'step': 1862, 'epoch': 1} +{'type': 'loss', 'content': 0.0033197938464581966, 'timestamp': '2025-09-10 02:40:03.350635', 'step': 1863, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:03.403715', 'step': 1863, 'epoch': 1} +{'type': 'loss', 'content': 0.016732338815927505, 'timestamp': '2025-09-10 02:40:03.409746', 'step': 1864, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:03.466538', 'step': 1864, 'epoch': 1} +{'type': 'loss', 'content': 0.0185225922614336, 'timestamp': '2025-09-10 02:40:03.477041', 'step': 1865, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:03.530585', 'step': 1865, 'epoch': 1} +{'type': 'loss', 'content': 0.0076630981639027596, 'timestamp': '2025-09-10 02:40:03.533488', 'step': 1866, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:03.586623', 'step': 1866, 'epoch': 1} +{'type': 'loss', 'content': 0.02388727478682995, 'timestamp': '2025-09-10 02:40:03.594743', 'step': 1867, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:03.647676', 'step': 1867, 'epoch': 1} +{'type': 'loss', 'content': 0.019633090123534203, 'timestamp': '2025-09-10 02:40:03.656575', 'step': 1868, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:03.708791', 'step': 1868, 'epoch': 1} +{'type': 'loss', 'content': 0.02341005578637123, 'timestamp': '2025-09-10 02:40:03.710816', 'step': 1869, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:03.768692', 'step': 1869, 'epoch': 1} +{'type': 'loss', 'content': 0.007239409722387791, 'timestamp': '2025-09-10 02:40:03.779152', 'step': 1870, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:03.832114', 'step': 1870, 'epoch': 1} +{'type': 'loss', 'content': 0.011209309101104736, 'timestamp': '2025-09-10 02:40:03.834269', 'step': 1871, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:03.887175', 'step': 1871, 'epoch': 1} +{'type': 'loss', 'content': 0.032616887241601944, 'timestamp': '2025-09-10 02:40:03.894601', 'step': 1872, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:03.947072', 'step': 1872, 'epoch': 1} +{'type': 'loss', 'content': 0.00542307598516345, 'timestamp': '2025-09-10 02:40:03.949371', 'step': 1873, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:40:04.001493', 'step': 1873, 'epoch': 1} +{'type': 'loss', 'content': 0.004950906150043011, 'timestamp': '2025-09-10 02:40:04.003633', 'step': 1874, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:04.070033', 'step': 1874, 'epoch': 1} +{'type': 'loss', 'content': 0.014805043116211891, 'timestamp': '2025-09-10 02:40:04.082230', 'step': 1875, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:04.135464', 'step': 1875, 'epoch': 1} +{'type': 'loss', 'content': 0.005124398972839117, 'timestamp': '2025-09-10 02:40:04.141116', 'step': 1876, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:04.194098', 'step': 1876, 'epoch': 1} +{'type': 'loss', 'content': 0.01372546423226595, 'timestamp': '2025-09-10 02:40:04.204618', 'step': 1877, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:04.257718', 'step': 1877, 'epoch': 1} +{'type': 'loss', 'content': 0.05634238198399544, 'timestamp': '2025-09-10 02:40:04.264139', 'step': 1878, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:04.317267', 'step': 1878, 'epoch': 1} +{'type': 'loss', 'content': 0.01247911062091589, 'timestamp': '2025-09-10 02:40:04.319533', 'step': 1879, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:04.385503', 'step': 1879, 'epoch': 1} +{'type': 'loss', 'content': 0.00997505895793438, 'timestamp': '2025-09-10 02:40:04.398540', 'step': 1880, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:04.451137', 'step': 1880, 'epoch': 1} +{'type': 'loss', 'content': 0.008485841564834118, 'timestamp': '2025-09-10 02:40:04.453921', 'step': 1881, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:04.507481', 'step': 1881, 'epoch': 1} +{'type': 'loss', 'content': 0.021304922178387642, 'timestamp': '2025-09-10 02:40:04.509806', 'step': 1882, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:04.570944', 'step': 1882, 'epoch': 1} +{'type': 'loss', 'content': 0.01408555917441845, 'timestamp': '2025-09-10 02:40:04.581616', 'step': 1883, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:04.635445', 'step': 1883, 'epoch': 1} +{'type': 'loss', 'content': 0.01440160907804966, 'timestamp': '2025-09-10 02:40:04.640963', 'step': 1884, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:04.694230', 'step': 1884, 'epoch': 1} +{'type': 'loss', 'content': 0.004871371667832136, 'timestamp': '2025-09-10 02:40:04.695981', 'step': 1885, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:04.750987', 'step': 1885, 'epoch': 1} +{'type': 'loss', 'content': 0.02186504192650318, 'timestamp': '2025-09-10 02:40:04.760776', 'step': 1886, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:04.814094', 'step': 1886, 'epoch': 1} +{'type': 'loss', 'content': 0.006802795920521021, 'timestamp': '2025-09-10 02:40:04.816099', 'step': 1887, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:04.868759', 'step': 1887, 'epoch': 1} +{'type': 'loss', 'content': 0.010723703540861607, 'timestamp': '2025-09-10 02:40:04.874705', 'step': 1888, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:04.926605', 'step': 1888, 'epoch': 1} +{'type': 'loss', 'content': 0.019888320937752724, 'timestamp': '2025-09-10 02:40:04.929725', 'step': 1889, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:04.982402', 'step': 1889, 'epoch': 1} +{'type': 'loss', 'content': 0.012764276005327702, 'timestamp': '2025-09-10 02:40:04.984675', 'step': 1890, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:05.037489', 'step': 1890, 'epoch': 1} +{'type': 'loss', 'content': 0.004333410877734423, 'timestamp': '2025-09-10 02:40:05.040329', 'step': 1891, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:05.093020', 'step': 1891, 'epoch': 1} +{'type': 'loss', 'content': 0.0036067774053663015, 'timestamp': '2025-09-10 02:40:05.098544', 'step': 1892, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:05.151069', 'step': 1892, 'epoch': 1} +{'type': 'loss', 'content': 0.00459138723090291, 'timestamp': '2025-09-10 02:40:05.154046', 'step': 1893, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:40:05.250242', 'step': 1893, 'epoch': 1} +{'type': 'loss', 'content': 0.014671608805656433, 'timestamp': '2025-09-10 02:40:05.268685', 'step': 1894, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:05.323673', 'step': 1894, 'epoch': 1} +{'type': 'loss', 'content': 0.010200425051152706, 'timestamp': '2025-09-10 02:40:05.331230', 'step': 1895, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:05.384682', 'step': 1895, 'epoch': 1} +{'type': 'loss', 'content': 0.019077172502875328, 'timestamp': '2025-09-10 02:40:05.390882', 'step': 1896, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:40:05.457493', 'step': 1896, 'epoch': 1} +{'type': 'loss', 'content': 0.007850597612559795, 'timestamp': '2025-09-10 02:40:05.471113', 'step': 1897, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:05.524315', 'step': 1897, 'epoch': 1} +{'type': 'loss', 'content': 0.0023193396627902985, 'timestamp': '2025-09-10 02:40:05.526465', 'step': 1898, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:05.579958', 'step': 1898, 'epoch': 1} +{'type': 'loss', 'content': 0.006891167256981134, 'timestamp': '2025-09-10 02:40:05.582890', 'step': 1899, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:05.636906', 'step': 1899, 'epoch': 1} +{'type': 'loss', 'content': 0.008234263397753239, 'timestamp': '2025-09-10 02:40:05.647293', 'step': 1900, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:05.710951', 'step': 1900, 'epoch': 1} +{'type': 'loss', 'content': 0.0036044667940586805, 'timestamp': '2025-09-10 02:40:05.722705', 'step': 1901, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:05.776641', 'step': 1901, 'epoch': 1} +{'type': 'loss', 'content': 0.001770059927366674, 'timestamp': '2025-09-10 02:40:05.786234', 'step': 1902, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:05.840202', 'step': 1902, 'epoch': 1} +{'type': 'loss', 'content': 0.0038121454417705536, 'timestamp': '2025-09-10 02:40:05.846217', 'step': 1903, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:05.906715', 'step': 1903, 'epoch': 1} +{'type': 'loss', 'content': 0.0055883280001580715, 'timestamp': '2025-09-10 02:40:05.918246', 'step': 1904, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:05.971992', 'step': 1904, 'epoch': 1} +{'type': 'loss', 'content': 0.016487419605255127, 'timestamp': '2025-09-10 02:40:05.974858', 'step': 1905, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:06.027284', 'step': 1905, 'epoch': 1} +{'type': 'loss', 'content': 0.01049624290317297, 'timestamp': '2025-09-10 02:40:06.029292', 'step': 1906, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:06.083699', 'step': 1906, 'epoch': 1} +{'type': 'loss', 'content': 0.0060119954869151115, 'timestamp': '2025-09-10 02:40:06.093534', 'step': 1907, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:06.146765', 'step': 1907, 'epoch': 1} +{'type': 'loss', 'content': 0.00729665532708168, 'timestamp': '2025-09-10 02:40:06.154015', 'step': 1908, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:06.206271', 'step': 1908, 'epoch': 1} +{'type': 'loss', 'content': 0.005614326801151037, 'timestamp': '2025-09-10 02:40:06.208039', 'step': 1909, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:06.260623', 'step': 1909, 'epoch': 1} +{'type': 'loss', 'content': 0.0018541625468060374, 'timestamp': '2025-09-10 02:40:06.262517', 'step': 1910, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:06.315959', 'step': 1910, 'epoch': 1} +{'type': 'loss', 'content': 0.00960423331707716, 'timestamp': '2025-09-10 02:40:06.322350', 'step': 1911, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:40:23.158724', 'step': 1911, 'epoch': 1} +{'type': 'pplx', 'content': 24056173.55783693, 'timestamp': '2025-09-10 02:40:23.161872', 'step': 1911, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:23.218843', 'step': 1911, 'epoch': 1} +{'type': 'loss', 'content': 0.014825215563178062, 'timestamp': '2025-09-10 02:40:23.230026', 'step': 1912, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:23.283028', 'step': 1912, 'epoch': 1} +{'type': 'loss', 'content': 0.0014569023624062538, 'timestamp': '2025-09-10 02:40:23.285459', 'step': 1913, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:23.338469', 'step': 1913, 'epoch': 1} +{'type': 'loss', 'content': 0.03071923367679119, 'timestamp': '2025-09-10 02:40:23.340508', 'step': 1914, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:23.393905', 'step': 1914, 'epoch': 1} +{'type': 'loss', 'content': 0.011707932688295841, 'timestamp': '2025-09-10 02:40:23.395971', 'step': 1915, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:23.456336', 'step': 1915, 'epoch': 1} +{'type': 'loss', 'content': 0.03220689669251442, 'timestamp': '2025-09-10 02:40:23.467856', 'step': 1916, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:23.522949', 'step': 1916, 'epoch': 1} +{'type': 'loss', 'content': 0.017585042864084244, 'timestamp': '2025-09-10 02:40:23.524832', 'step': 1917, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:23.578339', 'step': 1917, 'epoch': 1} +{'type': 'loss', 'content': 0.01379451435059309, 'timestamp': '2025-09-10 02:40:23.580389', 'step': 1918, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:23.634096', 'step': 1918, 'epoch': 1} +{'type': 'loss', 'content': 0.018169602379202843, 'timestamp': '2025-09-10 02:40:23.639976', 'step': 1919, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:23.693777', 'step': 1919, 'epoch': 1} +{'type': 'loss', 'content': 0.022639570757746696, 'timestamp': '2025-09-10 02:40:23.699886', 'step': 1920, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:23.752216', 'step': 1920, 'epoch': 1} +{'type': 'loss', 'content': 0.005521832965314388, 'timestamp': '2025-09-10 02:40:23.754985', 'step': 1921, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:23.816751', 'step': 1921, 'epoch': 1} +{'type': 'loss', 'content': 0.004009797237813473, 'timestamp': '2025-09-10 02:40:23.827864', 'step': 1922, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:23.880482', 'step': 1922, 'epoch': 1} +{'type': 'loss', 'content': 0.04552547261118889, 'timestamp': '2025-09-10 02:40:23.882414', 'step': 1923, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:23.934892', 'step': 1923, 'epoch': 1} +{'type': 'loss', 'content': 0.02368207462131977, 'timestamp': '2025-09-10 02:40:23.940653', 'step': 1924, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:23.993454', 'step': 1924, 'epoch': 1} +{'type': 'loss', 'content': 0.005057979375123978, 'timestamp': '2025-09-10 02:40:24.003498', 'step': 1925, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:24.058857', 'step': 1925, 'epoch': 1} +{'type': 'loss', 'content': 0.01649058423936367, 'timestamp': '2025-09-10 02:40:24.061064', 'step': 1926, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:24.114324', 'step': 1926, 'epoch': 1} +{'type': 'loss', 'content': 0.012464964762330055, 'timestamp': '2025-09-10 02:40:24.116376', 'step': 1927, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:24.169759', 'step': 1927, 'epoch': 1} +{'type': 'loss', 'content': 0.01559925265610218, 'timestamp': '2025-09-10 02:40:24.175670', 'step': 1928, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:24.227904', 'step': 1928, 'epoch': 1} +{'type': 'loss', 'content': 0.029018137603998184, 'timestamp': '2025-09-10 02:40:24.238166', 'step': 1929, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:24.293016', 'step': 1929, 'epoch': 1} +{'type': 'loss', 'content': 0.0177223589271307, 'timestamp': '2025-09-10 02:40:24.302753', 'step': 1930, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:24.355894', 'step': 1930, 'epoch': 1} +{'type': 'loss', 'content': 0.0032372507266700268, 'timestamp': '2025-09-10 02:40:24.362554', 'step': 1931, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:24.415006', 'step': 1931, 'epoch': 1} +{'type': 'loss', 'content': 0.0042731501162052155, 'timestamp': '2025-09-10 02:40:24.424257', 'step': 1932, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:24.476856', 'step': 1932, 'epoch': 1} +{'type': 'loss', 'content': 0.017299598082900047, 'timestamp': '2025-09-10 02:40:24.483564', 'step': 1933, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:24.537636', 'step': 1933, 'epoch': 1} +{'type': 'loss', 'content': 0.02613774500787258, 'timestamp': '2025-09-10 02:40:24.544334', 'step': 1934, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:24.597695', 'step': 1934, 'epoch': 1} +{'type': 'loss', 'content': 0.027100956067442894, 'timestamp': '2025-09-10 02:40:24.599828', 'step': 1935, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:24.652294', 'step': 1935, 'epoch': 1} +{'type': 'loss', 'content': 0.012455842457711697, 'timestamp': '2025-09-10 02:40:24.658209', 'step': 1936, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:24.724219', 'step': 1936, 'epoch': 1} +{'type': 'loss', 'content': 0.005860353820025921, 'timestamp': '2025-09-10 02:40:24.734452', 'step': 1937, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:24.788113', 'step': 1937, 'epoch': 1} +{'type': 'loss', 'content': 0.001896485686302185, 'timestamp': '2025-09-10 02:40:24.797731', 'step': 1938, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:24.851069', 'step': 1938, 'epoch': 1} +{'type': 'loss', 'content': 0.02183610573410988, 'timestamp': '2025-09-10 02:40:24.853067', 'step': 1939, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:24.906646', 'step': 1939, 'epoch': 1} +{'type': 'loss', 'content': 0.005698530934751034, 'timestamp': '2025-09-10 02:40:24.912313', 'step': 1940, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:24.964092', 'step': 1940, 'epoch': 1} +{'type': 'loss', 'content': 0.002761783543974161, 'timestamp': '2025-09-10 02:40:24.965800', 'step': 1941, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:25.018164', 'step': 1941, 'epoch': 1} +{'type': 'loss', 'content': 0.03263257071375847, 'timestamp': '2025-09-10 02:40:25.020267', 'step': 1942, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:25.073043', 'step': 1942, 'epoch': 1} +{'type': 'loss', 'content': 0.00514122424647212, 'timestamp': '2025-09-10 02:40:25.074908', 'step': 1943, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:25.127574', 'step': 1943, 'epoch': 1} +{'type': 'loss', 'content': 0.003783545223996043, 'timestamp': '2025-09-10 02:40:25.134944', 'step': 1944, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:25.186780', 'step': 1944, 'epoch': 1} +{'type': 'loss', 'content': 0.0033572230022400618, 'timestamp': '2025-09-10 02:40:25.188884', 'step': 1945, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:25.241947', 'step': 1945, 'epoch': 1} +{'type': 'loss', 'content': 0.0178757943212986, 'timestamp': '2025-09-10 02:40:25.248543', 'step': 1946, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:25.301799', 'step': 1946, 'epoch': 1} +{'type': 'loss', 'content': 0.01767478697001934, 'timestamp': '2025-09-10 02:40:25.311395', 'step': 1947, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:25.365027', 'step': 1947, 'epoch': 1} +{'type': 'loss', 'content': 0.03325825557112694, 'timestamp': '2025-09-10 02:40:25.370599', 'step': 1948, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:25.422800', 'step': 1948, 'epoch': 1} +{'type': 'loss', 'content': 0.008630207739770412, 'timestamp': '2025-09-10 02:40:25.425028', 'step': 1949, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:25.478012', 'step': 1949, 'epoch': 1} +{'type': 'loss', 'content': 0.012927801348268986, 'timestamp': '2025-09-10 02:40:25.480097', 'step': 1950, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:25.532402', 'step': 1950, 'epoch': 1} +{'type': 'loss', 'content': 0.01747622713446617, 'timestamp': '2025-09-10 02:40:25.534547', 'step': 1951, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:25.586979', 'step': 1951, 'epoch': 1} +{'type': 'loss', 'content': 0.00799343641847372, 'timestamp': '2025-09-10 02:40:25.593033', 'step': 1952, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:25.645898', 'step': 1952, 'epoch': 1} +{'type': 'loss', 'content': 0.006082185078412294, 'timestamp': '2025-09-10 02:40:25.652239', 'step': 1953, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:25.706314', 'step': 1953, 'epoch': 1} +{'type': 'loss', 'content': 0.004440020304173231, 'timestamp': '2025-09-10 02:40:25.708843', 'step': 1954, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:25.761772', 'step': 1954, 'epoch': 1} +{'type': 'loss', 'content': 0.002935823518782854, 'timestamp': '2025-09-10 02:40:25.768105', 'step': 1955, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:25.820739', 'step': 1955, 'epoch': 1} +{'type': 'loss', 'content': 0.009733036160469055, 'timestamp': '2025-09-10 02:40:25.826670', 'step': 1956, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:25.887079', 'step': 1956, 'epoch': 1} +{'type': 'loss', 'content': 0.01457192562520504, 'timestamp': '2025-09-10 02:40:25.898838', 'step': 1957, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:25.957224', 'step': 1957, 'epoch': 1} +{'type': 'loss', 'content': 0.005866926163434982, 'timestamp': '2025-09-10 02:40:25.967657', 'step': 1958, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:26.020790', 'step': 1958, 'epoch': 1} +{'type': 'loss', 'content': 0.0014265580102801323, 'timestamp': '2025-09-10 02:40:26.023624', 'step': 1959, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:26.077558', 'step': 1959, 'epoch': 1} +{'type': 'loss', 'content': 0.020798334851861, 'timestamp': '2025-09-10 02:40:26.087950', 'step': 1960, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:26.140441', 'step': 1960, 'epoch': 1} +{'type': 'loss', 'content': 0.002828251337632537, 'timestamp': '2025-09-10 02:40:26.142792', 'step': 1961, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:26.195785', 'step': 1961, 'epoch': 1} +{'type': 'loss', 'content': 0.0038729310035705566, 'timestamp': '2025-09-10 02:40:26.198704', 'step': 1962, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:26.252061', 'step': 1962, 'epoch': 1} +{'type': 'loss', 'content': 0.007973147556185722, 'timestamp': '2025-09-10 02:40:26.254226', 'step': 1963, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:26.308028', 'step': 1963, 'epoch': 1} +{'type': 'loss', 'content': 0.01782756857573986, 'timestamp': '2025-09-10 02:40:26.314847', 'step': 1964, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:26.368500', 'step': 1964, 'epoch': 1} +{'type': 'loss', 'content': 0.009090019389986992, 'timestamp': '2025-09-10 02:40:26.370564', 'step': 1965, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:26.424040', 'step': 1965, 'epoch': 1} +{'type': 'loss', 'content': 0.01522249635308981, 'timestamp': '2025-09-10 02:40:26.426215', 'step': 1966, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:26.480652', 'step': 1966, 'epoch': 1} +{'type': 'loss', 'content': 0.010693386197090149, 'timestamp': '2025-09-10 02:40:26.490543', 'step': 1967, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:26.543914', 'step': 1967, 'epoch': 1} +{'type': 'loss', 'content': 0.0013078663032501936, 'timestamp': '2025-09-10 02:40:26.550095', 'step': 1968, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:26.602813', 'step': 1968, 'epoch': 1} +{'type': 'loss', 'content': 0.02390674129128456, 'timestamp': '2025-09-10 02:40:26.604947', 'step': 1969, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:26.657694', 'step': 1969, 'epoch': 1} +{'type': 'loss', 'content': 0.01774417981505394, 'timestamp': '2025-09-10 02:40:26.659845', 'step': 1970, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:26.712806', 'step': 1970, 'epoch': 1} +{'type': 'loss', 'content': 0.02107955515384674, 'timestamp': '2025-09-10 02:40:26.714926', 'step': 1971, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:26.768454', 'step': 1971, 'epoch': 1} +{'type': 'loss', 'content': 0.011796504259109497, 'timestamp': '2025-09-10 02:40:26.778935', 'step': 1972, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:26.831230', 'step': 1972, 'epoch': 1} +{'type': 'loss', 'content': 0.006210298743098974, 'timestamp': '2025-09-10 02:40:26.833224', 'step': 1973, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:26.894962', 'step': 1973, 'epoch': 1} +{'type': 'loss', 'content': 0.010983224026858807, 'timestamp': '2025-09-10 02:40:26.906057', 'step': 1974, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:26.959127', 'step': 1974, 'epoch': 1} +{'type': 'loss', 'content': 0.0164656862616539, 'timestamp': '2025-09-10 02:40:26.961333', 'step': 1975, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:27.014441', 'step': 1975, 'epoch': 1} +{'type': 'loss', 'content': 0.0045163072645664215, 'timestamp': '2025-09-10 02:40:27.020596', 'step': 1976, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:40:27.087791', 'step': 1976, 'epoch': 1} +{'type': 'loss', 'content': 0.01173480600118637, 'timestamp': '2025-09-10 02:40:27.101499', 'step': 1977, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:27.155148', 'step': 1977, 'epoch': 1} +{'type': 'loss', 'content': 0.05862661823630333, 'timestamp': '2025-09-10 02:40:27.157976', 'step': 1978, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:27.211676', 'step': 1978, 'epoch': 1} +{'type': 'loss', 'content': 0.00327744847163558, 'timestamp': '2025-09-10 02:40:27.217681', 'step': 1979, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:27.270452', 'step': 1979, 'epoch': 1} +{'type': 'loss', 'content': 0.0009931438835337758, 'timestamp': '2025-09-10 02:40:27.276350', 'step': 1980, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:27.328971', 'step': 1980, 'epoch': 1} +{'type': 'loss', 'content': 0.0451321043074131, 'timestamp': '2025-09-10 02:40:27.331008', 'step': 1981, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:27.383610', 'step': 1981, 'epoch': 1} +{'type': 'loss', 'content': 0.018608104437589645, 'timestamp': '2025-09-10 02:40:27.385689', 'step': 1982, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:40:27.455604', 'step': 1982, 'epoch': 1} +{'type': 'loss', 'content': 0.005149955861270428, 'timestamp': '2025-09-10 02:40:27.468476', 'step': 1983, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:27.520993', 'step': 1983, 'epoch': 1} +{'type': 'loss', 'content': 0.04596984386444092, 'timestamp': '2025-09-10 02:40:27.526992', 'step': 1984, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:27.579735', 'step': 1984, 'epoch': 1} +{'type': 'loss', 'content': 0.007791085634380579, 'timestamp': '2025-09-10 02:40:27.581620', 'step': 1985, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:27.634521', 'step': 1985, 'epoch': 1} +{'type': 'loss', 'content': 0.009980211034417152, 'timestamp': '2025-09-10 02:40:27.636600', 'step': 1986, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:27.689577', 'step': 1986, 'epoch': 1} +{'type': 'loss', 'content': 0.02932649478316307, 'timestamp': '2025-09-10 02:40:27.691837', 'step': 1987, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:27.745076', 'step': 1987, 'epoch': 1} +{'type': 'loss', 'content': 0.004920269828289747, 'timestamp': '2025-09-10 02:40:27.751098', 'step': 1988, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:27.803776', 'step': 1988, 'epoch': 1} +{'type': 'loss', 'content': 0.007863566279411316, 'timestamp': '2025-09-10 02:40:27.805711', 'step': 1989, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:27.858536', 'step': 1989, 'epoch': 1} +{'type': 'loss', 'content': 0.014759792014956474, 'timestamp': '2025-09-10 02:40:27.865030', 'step': 1990, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:27.918231', 'step': 1990, 'epoch': 1} +{'type': 'loss', 'content': 0.00929697509855032, 'timestamp': '2025-09-10 02:40:27.920521', 'step': 1991, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:27.975027', 'step': 1991, 'epoch': 1} +{'type': 'loss', 'content': 0.01565462164580822, 'timestamp': '2025-09-10 02:40:27.985619', 'step': 1992, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:28.037768', 'step': 1992, 'epoch': 1} +{'type': 'loss', 'content': 0.017104636877775192, 'timestamp': '2025-09-10 02:40:28.039982', 'step': 1993, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:28.093153', 'step': 1993, 'epoch': 1} +{'type': 'loss', 'content': 0.002158787567168474, 'timestamp': '2025-09-10 02:40:28.095125', 'step': 1994, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:28.148034', 'step': 1994, 'epoch': 1} +{'type': 'loss', 'content': 0.0019636712968349457, 'timestamp': '2025-09-10 02:40:28.150230', 'step': 1995, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:28.203161', 'step': 1995, 'epoch': 1} +{'type': 'loss', 'content': 0.014361539855599403, 'timestamp': '2025-09-10 02:40:28.209025', 'step': 1996, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:28.261114', 'step': 1996, 'epoch': 1} +{'type': 'loss', 'content': 0.005031212233006954, 'timestamp': '2025-09-10 02:40:28.264074', 'step': 1997, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:28.316944', 'step': 1997, 'epoch': 1} +{'type': 'loss', 'content': 0.001798151759430766, 'timestamp': '2025-09-10 02:40:28.323540', 'step': 1998, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:28.376162', 'step': 1998, 'epoch': 1} +{'type': 'loss', 'content': 0.004470630548894405, 'timestamp': '2025-09-10 02:40:28.378097', 'step': 1999, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:28.431539', 'step': 1999, 'epoch': 1} +{'type': 'loss', 'content': 0.014826876111328602, 'timestamp': '2025-09-10 02:40:28.441936', 'step': 2000, 'epoch': 1} +{'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-10 02:40:28.806360', 'step': 2000, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:28.861348', 'step': 2000, 'epoch': 1} +{'type': 'loss', 'content': 0.005624215584248304, 'timestamp': '2025-09-10 02:40:28.869124', 'step': 2001, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:28.922948', 'step': 2001, 'epoch': 1} +{'type': 'loss', 'content': 0.006075866054743528, 'timestamp': '2025-09-10 02:40:28.924936', 'step': 2002, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:28.977993', 'step': 2002, 'epoch': 1} +{'type': 'loss', 'content': 0.004470535088330507, 'timestamp': '2025-09-10 02:40:28.980104', 'step': 2003, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:29.032984', 'step': 2003, 'epoch': 1} +{'type': 'loss', 'content': 0.014739048667252064, 'timestamp': '2025-09-10 02:40:29.039098', 'step': 2004, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:29.092326', 'step': 2004, 'epoch': 1} +{'type': 'loss', 'content': 0.0006459683063440025, 'timestamp': '2025-09-10 02:40:29.102913', 'step': 2005, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:29.155962', 'step': 2005, 'epoch': 1} +{'type': 'loss', 'content': 0.007557640317827463, 'timestamp': '2025-09-10 02:40:29.158049', 'step': 2006, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:29.211024', 'step': 2006, 'epoch': 1} +{'type': 'loss', 'content': 0.03876524418592453, 'timestamp': '2025-09-10 02:40:29.213043', 'step': 2007, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:29.266270', 'step': 2007, 'epoch': 1} +{'type': 'loss', 'content': 0.0018814915092661977, 'timestamp': '2025-09-10 02:40:29.274961', 'step': 2008, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:29.327012', 'step': 2008, 'epoch': 1} +{'type': 'loss', 'content': 0.06593555212020874, 'timestamp': '2025-09-10 02:40:29.333537', 'step': 2009, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:29.386951', 'step': 2009, 'epoch': 1} +{'type': 'loss', 'content': 0.0020403151866048574, 'timestamp': '2025-09-10 02:40:29.389811', 'step': 2010, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:29.442137', 'step': 2010, 'epoch': 1} +{'type': 'loss', 'content': 0.0033842516131699085, 'timestamp': '2025-09-10 02:40:29.444278', 'step': 2011, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:29.497009', 'step': 2011, 'epoch': 1} +{'type': 'loss', 'content': 0.00728357071056962, 'timestamp': '2025-09-10 02:40:29.502992', 'step': 2012, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:29.555775', 'step': 2012, 'epoch': 1} +{'type': 'loss', 'content': 0.03079916164278984, 'timestamp': '2025-09-10 02:40:29.557790', 'step': 2013, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:29.610985', 'step': 2013, 'epoch': 1} +{'type': 'loss', 'content': 0.07625376433134079, 'timestamp': '2025-09-10 02:40:29.612951', 'step': 2014, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:29.666191', 'step': 2014, 'epoch': 1} +{'type': 'loss', 'content': 0.001167031587101519, 'timestamp': '2025-09-10 02:40:29.668384', 'step': 2015, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:29.721156', 'step': 2015, 'epoch': 1} +{'type': 'loss', 'content': 0.014314286410808563, 'timestamp': '2025-09-10 02:40:29.727091', 'step': 2016, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:29.779107', 'step': 2016, 'epoch': 1} +{'type': 'loss', 'content': 0.014441040344536304, 'timestamp': '2025-09-10 02:40:29.781155', 'step': 2017, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:29.833680', 'step': 2017, 'epoch': 1} +{'type': 'loss', 'content': 0.008695382624864578, 'timestamp': '2025-09-10 02:40:29.836827', 'step': 2018, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:29.889332', 'step': 2018, 'epoch': 1} +{'type': 'loss', 'content': 0.016935424879193306, 'timestamp': '2025-09-10 02:40:29.891605', 'step': 2019, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:29.944543', 'step': 2019, 'epoch': 1} +{'type': 'loss', 'content': 0.0068209171295166016, 'timestamp': '2025-09-10 02:40:29.950429', 'step': 2020, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:30.003480', 'step': 2020, 'epoch': 1} +{'type': 'loss', 'content': 0.02026510424911976, 'timestamp': '2025-09-10 02:40:30.010980', 'step': 2021, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:30.071495', 'step': 2021, 'epoch': 1} +{'type': 'loss', 'content': 0.01842624880373478, 'timestamp': '2025-09-10 02:40:30.082184', 'step': 2022, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:30.143131', 'step': 2022, 'epoch': 1} +{'type': 'loss', 'content': 0.033158693462610245, 'timestamp': '2025-09-10 02:40:30.153815', 'step': 2023, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:30.207737', 'step': 2023, 'epoch': 1} +{'type': 'loss', 'content': 0.0011084345169365406, 'timestamp': '2025-09-10 02:40:30.214780', 'step': 2024, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:30.273442', 'step': 2024, 'epoch': 1} +{'type': 'loss', 'content': 0.004500050563365221, 'timestamp': '2025-09-10 02:40:30.285031', 'step': 2025, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:30.338633', 'step': 2025, 'epoch': 1} +{'type': 'loss', 'content': 0.00649598240852356, 'timestamp': '2025-09-10 02:40:30.341661', 'step': 2026, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:30.395178', 'step': 2026, 'epoch': 1} +{'type': 'loss', 'content': 0.008045542985200882, 'timestamp': '2025-09-10 02:40:30.397350', 'step': 2027, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:30.450292', 'step': 2027, 'epoch': 1} +{'type': 'loss', 'content': 0.009977241978049278, 'timestamp': '2025-09-10 02:40:30.456240', 'step': 2028, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:30.508720', 'step': 2028, 'epoch': 1} +{'type': 'loss', 'content': 0.005319902207702398, 'timestamp': '2025-09-10 02:40:30.510784', 'step': 2029, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:30.563647', 'step': 2029, 'epoch': 1} +{'type': 'loss', 'content': 0.054678674787282944, 'timestamp': '2025-09-10 02:40:30.565900', 'step': 2030, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:30.618888', 'step': 2030, 'epoch': 1} +{'type': 'loss', 'content': 0.019794004037976265, 'timestamp': '2025-09-10 02:40:30.621846', 'step': 2031, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:30.679506', 'step': 2031, 'epoch': 1} +{'type': 'loss', 'content': 0.0349082425236702, 'timestamp': '2025-09-10 02:40:30.690856', 'step': 2032, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:30.745018', 'step': 2032, 'epoch': 1} +{'type': 'loss', 'content': 0.0052823289297521114, 'timestamp': '2025-09-10 02:40:30.748108', 'step': 2033, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:30.802787', 'step': 2033, 'epoch': 1} +{'type': 'loss', 'content': 0.01749844290316105, 'timestamp': '2025-09-10 02:40:30.812501', 'step': 2034, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:30.866184', 'step': 2034, 'epoch': 1} +{'type': 'loss', 'content': 0.008537224493920803, 'timestamp': '2025-09-10 02:40:30.868578', 'step': 2035, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:40:30.948942', 'step': 2035, 'epoch': 1} +{'type': 'loss', 'content': 0.012161684222519398, 'timestamp': '2025-09-10 02:40:30.964786', 'step': 2036, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:31.017952', 'step': 2036, 'epoch': 1} +{'type': 'loss', 'content': 0.015352214686572552, 'timestamp': '2025-09-10 02:40:31.019928', 'step': 2037, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:31.077785', 'step': 2037, 'epoch': 1} +{'type': 'loss', 'content': 0.012421314604580402, 'timestamp': '2025-09-10 02:40:31.088226', 'step': 2038, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:31.141507', 'step': 2038, 'epoch': 1} +{'type': 'loss', 'content': 0.014564496465027332, 'timestamp': '2025-09-10 02:40:31.143756', 'step': 2039, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:31.197445', 'step': 2039, 'epoch': 1} +{'type': 'loss', 'content': 0.024082129821181297, 'timestamp': '2025-09-10 02:40:31.203492', 'step': 2040, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:31.256660', 'step': 2040, 'epoch': 1} +{'type': 'loss', 'content': 0.014478149823844433, 'timestamp': '2025-09-10 02:40:31.258781', 'step': 2041, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:31.320304', 'step': 2041, 'epoch': 1} +{'type': 'loss', 'content': 0.019619962200522423, 'timestamp': '2025-09-10 02:40:31.331245', 'step': 2042, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:31.384981', 'step': 2042, 'epoch': 1} +{'type': 'loss', 'content': 0.006779780145734549, 'timestamp': '2025-09-10 02:40:31.387119', 'step': 2043, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:40:31.454926', 'step': 2043, 'epoch': 1} +{'type': 'loss', 'content': 0.0027434879448264837, 'timestamp': '2025-09-10 02:40:31.468292', 'step': 2044, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:31.526393', 'step': 2044, 'epoch': 1} +{'type': 'loss', 'content': 0.0017053645569831133, 'timestamp': '2025-09-10 02:40:31.537583', 'step': 2045, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:31.591233', 'step': 2045, 'epoch': 1} +{'type': 'loss', 'content': 0.014673621393740177, 'timestamp': '2025-09-10 02:40:31.593385', 'step': 2046, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:31.646329', 'step': 2046, 'epoch': 1} +{'type': 'loss', 'content': 0.01856462098658085, 'timestamp': '2025-09-10 02:40:31.648813', 'step': 2047, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:31.702231', 'step': 2047, 'epoch': 1} +{'type': 'loss', 'content': 0.017966721206903458, 'timestamp': '2025-09-10 02:40:31.709367', 'step': 2048, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:31.762069', 'step': 2048, 'epoch': 1} +{'type': 'loss', 'content': 0.023420805111527443, 'timestamp': '2025-09-10 02:40:31.764310', 'step': 2049, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:31.817365', 'step': 2049, 'epoch': 1} +{'type': 'loss', 'content': 0.026248861104249954, 'timestamp': '2025-09-10 02:40:31.819460', 'step': 2050, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:31.877177', 'step': 2050, 'epoch': 1} +{'type': 'loss', 'content': 0.03295387700200081, 'timestamp': '2025-09-10 02:40:31.887597', 'step': 2051, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:31.941811', 'step': 2051, 'epoch': 1} +{'type': 'loss', 'content': 0.014977223239839077, 'timestamp': '2025-09-10 02:40:31.947854', 'step': 2052, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:32.000312', 'step': 2052, 'epoch': 1} +{'type': 'loss', 'content': 0.0006909515359438956, 'timestamp': '2025-09-10 02:40:32.002452', 'step': 2053, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:32.055476', 'step': 2053, 'epoch': 1} +{'type': 'loss', 'content': 0.0030724534299224615, 'timestamp': '2025-09-10 02:40:32.063602', 'step': 2054, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:32.117521', 'step': 2054, 'epoch': 1} +{'type': 'loss', 'content': 0.009656290523707867, 'timestamp': '2025-09-10 02:40:32.119540', 'step': 2055, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:32.172823', 'step': 2055, 'epoch': 1} +{'type': 'loss', 'content': 0.026681674644351006, 'timestamp': '2025-09-10 02:40:32.178727', 'step': 2056, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:32.231883', 'step': 2056, 'epoch': 1} +{'type': 'loss', 'content': 0.012981426902115345, 'timestamp': '2025-09-10 02:40:32.242015', 'step': 2057, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:32.296889', 'step': 2057, 'epoch': 1} +{'type': 'loss', 'content': 0.006506530102342367, 'timestamp': '2025-09-10 02:40:32.302560', 'step': 2058, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:40:49.113422', 'step': 2058, 'epoch': 1} +{'type': 'pplx', 'content': 26482598.556206435, 'timestamp': '2025-09-10 02:40:49.116552', 'step': 2058, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:49.172178', 'step': 2058, 'epoch': 1} +{'type': 'loss', 'content': 0.024873873218894005, 'timestamp': '2025-09-10 02:40:49.175258', 'step': 2059, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:49.233670', 'step': 2059, 'epoch': 1} +{'type': 'loss', 'content': 0.0064423284493386745, 'timestamp': '2025-09-10 02:40:49.244883', 'step': 2060, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:49.298280', 'step': 2060, 'epoch': 1} +{'type': 'loss', 'content': 0.037066757678985596, 'timestamp': '2025-09-10 02:40:49.307988', 'step': 2061, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:49.362062', 'step': 2061, 'epoch': 1} +{'type': 'loss', 'content': 0.008213195949792862, 'timestamp': '2025-09-10 02:40:49.364425', 'step': 2062, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:49.417815', 'step': 2062, 'epoch': 1} +{'type': 'loss', 'content': 0.021275460720062256, 'timestamp': '2025-09-10 02:40:49.424527', 'step': 2063, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:49.481192', 'step': 2063, 'epoch': 1} +{'type': 'loss', 'content': 0.011230670846998692, 'timestamp': '2025-09-10 02:40:49.487036', 'step': 2064, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:49.539236', 'step': 2064, 'epoch': 1} +{'type': 'loss', 'content': 0.006780553143471479, 'timestamp': '2025-09-10 02:40:49.544108', 'step': 2065, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:49.601060', 'step': 2065, 'epoch': 1} +{'type': 'loss', 'content': 0.007425476796925068, 'timestamp': '2025-09-10 02:40:49.608155', 'step': 2066, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:49.665936', 'step': 2066, 'epoch': 1} +{'type': 'loss', 'content': 0.021900055930018425, 'timestamp': '2025-09-10 02:40:49.675526', 'step': 2067, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:49.730962', 'step': 2067, 'epoch': 1} +{'type': 'loss', 'content': 0.004908385220915079, 'timestamp': '2025-09-10 02:40:49.741371', 'step': 2068, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:49.814731', 'step': 2068, 'epoch': 1} +{'type': 'loss', 'content': 0.011295017786324024, 'timestamp': '2025-09-10 02:40:49.817684', 'step': 2069, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:49.872430', 'step': 2069, 'epoch': 1} +{'type': 'loss', 'content': 0.003216175129637122, 'timestamp': '2025-09-10 02:40:49.880617', 'step': 2070, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:49.935081', 'step': 2070, 'epoch': 1} +{'type': 'loss', 'content': 0.0031660795211791992, 'timestamp': '2025-09-10 02:40:49.941930', 'step': 2071, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:49.996444', 'step': 2071, 'epoch': 1} +{'type': 'loss', 'content': 0.006437338888645172, 'timestamp': '2025-09-10 02:40:50.013093', 'step': 2072, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:50.069767', 'step': 2072, 'epoch': 1} +{'type': 'loss', 'content': 0.020404119044542313, 'timestamp': '2025-09-10 02:40:50.076080', 'step': 2073, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:50.138601', 'step': 2073, 'epoch': 1} +{'type': 'loss', 'content': 0.004126450512558222, 'timestamp': '2025-09-10 02:40:50.148246', 'step': 2074, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:40:50.228133', 'step': 2074, 'epoch': 1} +{'type': 'loss', 'content': 0.0178743414580822, 'timestamp': '2025-09-10 02:40:50.238820', 'step': 2075, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:50.294582', 'step': 2075, 'epoch': 1} +{'type': 'loss', 'content': 0.01718183420598507, 'timestamp': '2025-09-10 02:40:50.302841', 'step': 2076, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:50.373769', 'step': 2076, 'epoch': 1} +{'type': 'loss', 'content': 0.0029403779190033674, 'timestamp': '2025-09-10 02:40:50.376717', 'step': 2077, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:50.449271', 'step': 2077, 'epoch': 1} +{'type': 'loss', 'content': 0.005355836357921362, 'timestamp': '2025-09-10 02:40:50.453516', 'step': 2078, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:50.516547', 'step': 2078, 'epoch': 1} +{'type': 'loss', 'content': 0.007841928862035275, 'timestamp': '2025-09-10 02:40:50.520189', 'step': 2079, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:40:50.597780', 'step': 2079, 'epoch': 1} +{'type': 'loss', 'content': 0.0016193120973184705, 'timestamp': '2025-09-10 02:40:50.612645', 'step': 2080, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:50.665594', 'step': 2080, 'epoch': 1} +{'type': 'loss', 'content': 0.008064589463174343, 'timestamp': '2025-09-10 02:40:50.668611', 'step': 2081, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:50.721966', 'step': 2081, 'epoch': 1} +{'type': 'loss', 'content': 0.026570020243525505, 'timestamp': '2025-09-10 02:40:50.724252', 'step': 2082, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:50.777902', 'step': 2082, 'epoch': 1} +{'type': 'loss', 'content': 0.021081626415252686, 'timestamp': '2025-09-10 02:40:50.787544', 'step': 2083, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:50.841123', 'step': 2083, 'epoch': 1} +{'type': 'loss', 'content': 0.00983034260571003, 'timestamp': '2025-09-10 02:40:50.846721', 'step': 2084, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:50.911533', 'step': 2084, 'epoch': 1} +{'type': 'loss', 'content': 0.014769394882023335, 'timestamp': '2025-09-10 02:40:50.924732', 'step': 2085, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:50.978880', 'step': 2085, 'epoch': 1} +{'type': 'loss', 'content': 0.0013401007745414972, 'timestamp': '2025-09-10 02:40:50.981036', 'step': 2086, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:51.034679', 'step': 2086, 'epoch': 1} +{'type': 'loss', 'content': 0.007707037031650543, 'timestamp': '2025-09-10 02:40:51.036886', 'step': 2087, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:51.098767', 'step': 2087, 'epoch': 1} +{'type': 'loss', 'content': 0.002891720738261938, 'timestamp': '2025-09-10 02:40:51.110646', 'step': 2088, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:51.164562', 'step': 2088, 'epoch': 1} +{'type': 'loss', 'content': 0.04081105440855026, 'timestamp': '2025-09-10 02:40:51.166698', 'step': 2089, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:51.221682', 'step': 2089, 'epoch': 1} +{'type': 'loss', 'content': 0.035205941647291183, 'timestamp': '2025-09-10 02:40:51.231520', 'step': 2090, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:51.297941', 'step': 2090, 'epoch': 1} +{'type': 'loss', 'content': 0.004805359989404678, 'timestamp': '2025-09-10 02:40:51.310208', 'step': 2091, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:51.363441', 'step': 2091, 'epoch': 1} +{'type': 'loss', 'content': 0.009850825183093548, 'timestamp': '2025-09-10 02:40:51.369467', 'step': 2092, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:51.434848', 'step': 2092, 'epoch': 1} +{'type': 'loss', 'content': 0.0668928474187851, 'timestamp': '2025-09-10 02:40:51.448062', 'step': 2093, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:51.501565', 'step': 2093, 'epoch': 1} +{'type': 'loss', 'content': 0.021095644682645798, 'timestamp': '2025-09-10 02:40:51.504762', 'step': 2094, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:51.557952', 'step': 2094, 'epoch': 1} +{'type': 'loss', 'content': 0.02470650151371956, 'timestamp': '2025-09-10 02:40:51.566111', 'step': 2095, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:51.619223', 'step': 2095, 'epoch': 1} +{'type': 'loss', 'content': 0.015062669292092323, 'timestamp': '2025-09-10 02:40:51.625078', 'step': 2096, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:51.677955', 'step': 2096, 'epoch': 1} +{'type': 'loss', 'content': 0.010145655833184719, 'timestamp': '2025-09-10 02:40:51.684519', 'step': 2097, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:51.737896', 'step': 2097, 'epoch': 1} +{'type': 'loss', 'content': 0.008013242855668068, 'timestamp': '2025-09-10 02:40:51.740021', 'step': 2098, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:51.793229', 'step': 2098, 'epoch': 1} +{'type': 'loss', 'content': 0.0018540335586294532, 'timestamp': '2025-09-10 02:40:51.802810', 'step': 2099, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:51.856652', 'step': 2099, 'epoch': 1} +{'type': 'loss', 'content': 0.008650798350572586, 'timestamp': '2025-09-10 02:40:51.865483', 'step': 2100, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:51.917795', 'step': 2100, 'epoch': 1} +{'type': 'loss', 'content': 0.00515222130343318, 'timestamp': '2025-09-10 02:40:51.920953', 'step': 2101, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:51.975155', 'step': 2101, 'epoch': 1} +{'type': 'loss', 'content': 0.009928855113685131, 'timestamp': '2025-09-10 02:40:51.977422', 'step': 2102, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:52.031787', 'step': 2102, 'epoch': 1} +{'type': 'loss', 'content': 0.013099177740514278, 'timestamp': '2025-09-10 02:40:52.034052', 'step': 2103, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:52.086780', 'step': 2103, 'epoch': 1} +{'type': 'loss', 'content': 0.017769407480955124, 'timestamp': '2025-09-10 02:40:52.095945', 'step': 2104, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:52.152661', 'step': 2104, 'epoch': 1} +{'type': 'loss', 'content': 0.0073226215317845345, 'timestamp': '2025-09-10 02:40:52.163885', 'step': 2105, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:52.217115', 'step': 2105, 'epoch': 1} +{'type': 'loss', 'content': 0.03404330834746361, 'timestamp': '2025-09-10 02:40:52.219416', 'step': 2106, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:52.277853', 'step': 2106, 'epoch': 1} +{'type': 'loss', 'content': 0.021174529567360878, 'timestamp': '2025-09-10 02:40:52.288285', 'step': 2107, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:52.341070', 'step': 2107, 'epoch': 1} +{'type': 'loss', 'content': 0.013351579196751118, 'timestamp': '2025-09-10 02:40:52.346624', 'step': 2108, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:52.398571', 'step': 2108, 'epoch': 1} +{'type': 'loss', 'content': 0.0015658928314223886, 'timestamp': '2025-09-10 02:40:52.405076', 'step': 2109, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:52.458803', 'step': 2109, 'epoch': 1} +{'type': 'loss', 'content': 0.005374718923121691, 'timestamp': '2025-09-10 02:40:52.466848', 'step': 2110, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:52.525315', 'step': 2110, 'epoch': 1} +{'type': 'loss', 'content': 0.016732919961214066, 'timestamp': '2025-09-10 02:40:52.535758', 'step': 2111, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:52.589115', 'step': 2111, 'epoch': 1} +{'type': 'loss', 'content': 0.006754300557076931, 'timestamp': '2025-09-10 02:40:52.595902', 'step': 2112, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:52.648472', 'step': 2112, 'epoch': 1} +{'type': 'loss', 'content': 0.011038624681532383, 'timestamp': '2025-09-10 02:40:52.651259', 'step': 2113, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:52.704536', 'step': 2113, 'epoch': 1} +{'type': 'loss', 'content': 0.0231216661632061, 'timestamp': '2025-09-10 02:40:52.706882', 'step': 2114, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:52.760117', 'step': 2114, 'epoch': 1} +{'type': 'loss', 'content': 0.022676901891827583, 'timestamp': '2025-09-10 02:40:52.762541', 'step': 2115, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:52.816024', 'step': 2115, 'epoch': 1} +{'type': 'loss', 'content': 0.01557923387736082, 'timestamp': '2025-09-10 02:40:52.821875', 'step': 2116, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:52.874489', 'step': 2116, 'epoch': 1} +{'type': 'loss', 'content': 0.00846847239881754, 'timestamp': '2025-09-10 02:40:52.876937', 'step': 2117, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:52.930537', 'step': 2117, 'epoch': 1} +{'type': 'loss', 'content': 0.02981100045144558, 'timestamp': '2025-09-10 02:40:52.932808', 'step': 2118, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:52.986485', 'step': 2118, 'epoch': 1} +{'type': 'loss', 'content': 0.022590333595871925, 'timestamp': '2025-09-10 02:40:52.988714', 'step': 2119, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:53.042318', 'step': 2119, 'epoch': 1} +{'type': 'loss', 'content': 0.030419545248150826, 'timestamp': '2025-09-10 02:40:53.052714', 'step': 2120, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:53.105946', 'step': 2120, 'epoch': 1} +{'type': 'loss', 'content': 0.03492049500346184, 'timestamp': '2025-09-10 02:40:53.107948', 'step': 2121, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:53.161050', 'step': 2121, 'epoch': 1} +{'type': 'loss', 'content': 0.015666289255023003, 'timestamp': '2025-09-10 02:40:53.163443', 'step': 2122, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:53.216595', 'step': 2122, 'epoch': 1} +{'type': 'loss', 'content': 0.01425112783908844, 'timestamp': '2025-09-10 02:40:53.223277', 'step': 2123, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:53.276776', 'step': 2123, 'epoch': 1} +{'type': 'loss', 'content': 0.012315441854298115, 'timestamp': '2025-09-10 02:40:53.282606', 'step': 2124, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:53.335064', 'step': 2124, 'epoch': 1} +{'type': 'loss', 'content': 0.059362031519412994, 'timestamp': '2025-09-10 02:40:53.343410', 'step': 2125, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:53.398609', 'step': 2125, 'epoch': 1} +{'type': 'loss', 'content': 0.007058224640786648, 'timestamp': '2025-09-10 02:40:53.400668', 'step': 2126, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:53.453569', 'step': 2126, 'epoch': 1} +{'type': 'loss', 'content': 0.010253772139549255, 'timestamp': '2025-09-10 02:40:53.455797', 'step': 2127, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:53.508779', 'step': 2127, 'epoch': 1} +{'type': 'loss', 'content': 0.001817534677684307, 'timestamp': '2025-09-10 02:40:53.516151', 'step': 2128, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:53.568888', 'step': 2128, 'epoch': 1} +{'type': 'loss', 'content': 0.022460997104644775, 'timestamp': '2025-09-10 02:40:53.571191', 'step': 2129, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:53.624430', 'step': 2129, 'epoch': 1} +{'type': 'loss', 'content': 0.015433979220688343, 'timestamp': '2025-09-10 02:40:53.626651', 'step': 2130, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:53.679649', 'step': 2130, 'epoch': 1} +{'type': 'loss', 'content': 0.007861977443099022, 'timestamp': '2025-09-10 02:40:53.681933', 'step': 2131, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:53.734473', 'step': 2131, 'epoch': 1} +{'type': 'loss', 'content': 0.018494803458452225, 'timestamp': '2025-09-10 02:40:53.741861', 'step': 2132, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:53.795208', 'step': 2132, 'epoch': 1} +{'type': 'loss', 'content': 0.006038778927177191, 'timestamp': '2025-09-10 02:40:53.805772', 'step': 2133, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:53.858888', 'step': 2133, 'epoch': 1} +{'type': 'loss', 'content': 0.00769368140026927, 'timestamp': '2025-09-10 02:40:53.861058', 'step': 2134, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:53.914948', 'step': 2134, 'epoch': 1} +{'type': 'loss', 'content': 0.007579221855849028, 'timestamp': '2025-09-10 02:40:53.924541', 'step': 2135, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:40:53.991935', 'step': 2135, 'epoch': 1} +{'type': 'loss', 'content': 0.005548670422285795, 'timestamp': '2025-09-10 02:40:54.004958', 'step': 2136, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:54.064983', 'step': 2136, 'epoch': 1} +{'type': 'loss', 'content': 0.012776902876794338, 'timestamp': '2025-09-10 02:40:54.076805', 'step': 2137, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:54.129638', 'step': 2137, 'epoch': 1} +{'type': 'loss', 'content': 0.010065835900604725, 'timestamp': '2025-09-10 02:40:54.131690', 'step': 2138, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:54.189670', 'step': 2138, 'epoch': 1} +{'type': 'loss', 'content': 0.019276706501841545, 'timestamp': '2025-09-10 02:40:54.200133', 'step': 2139, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:54.253056', 'step': 2139, 'epoch': 1} +{'type': 'loss', 'content': 0.012315203435719013, 'timestamp': '2025-09-10 02:40:54.258806', 'step': 2140, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:54.310812', 'step': 2140, 'epoch': 1} +{'type': 'loss', 'content': 0.006040751468390226, 'timestamp': '2025-09-10 02:40:54.312837', 'step': 2141, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:54.365478', 'step': 2141, 'epoch': 1} +{'type': 'loss', 'content': 0.005082997027784586, 'timestamp': '2025-09-10 02:40:54.372180', 'step': 2142, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:54.425890', 'step': 2142, 'epoch': 1} +{'type': 'loss', 'content': 0.009807256050407887, 'timestamp': '2025-09-10 02:40:54.432437', 'step': 2143, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:54.490807', 'step': 2143, 'epoch': 1} +{'type': 'loss', 'content': 0.003767900401726365, 'timestamp': '2025-09-10 02:40:54.502042', 'step': 2144, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:54.554992', 'step': 2144, 'epoch': 1} +{'type': 'loss', 'content': 0.011249847710132599, 'timestamp': '2025-09-10 02:40:54.557268', 'step': 2145, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:54.612999', 'step': 2145, 'epoch': 1} +{'type': 'loss', 'content': 0.008101708255708218, 'timestamp': '2025-09-10 02:40:54.622756', 'step': 2146, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:54.677208', 'step': 2146, 'epoch': 1} +{'type': 'loss', 'content': 0.004199292976409197, 'timestamp': '2025-09-10 02:40:54.679656', 'step': 2147, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:54.733384', 'step': 2147, 'epoch': 1} +{'type': 'loss', 'content': 0.011460386216640472, 'timestamp': '2025-09-10 02:40:54.739338', 'step': 2148, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:54.792358', 'step': 2148, 'epoch': 1} +{'type': 'loss', 'content': 0.010788912884891033, 'timestamp': '2025-09-10 02:40:54.794656', 'step': 2149, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:54.846952', 'step': 2149, 'epoch': 1} +{'type': 'loss', 'content': 0.04312171787023544, 'timestamp': '2025-09-10 02:40:54.849091', 'step': 2150, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:40:54.910876', 'step': 2150, 'epoch': 1} +{'type': 'loss', 'content': 0.004354742355644703, 'timestamp': '2025-09-10 02:40:54.921988', 'step': 2151, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:54.974912', 'step': 2151, 'epoch': 1} +{'type': 'loss', 'content': 0.008099909871816635, 'timestamp': '2025-09-10 02:40:54.980530', 'step': 2152, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:55.033326', 'step': 2152, 'epoch': 1} +{'type': 'loss', 'content': 0.008201665244996548, 'timestamp': '2025-09-10 02:40:55.043798', 'step': 2153, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:55.096193', 'step': 2153, 'epoch': 1} +{'type': 'loss', 'content': 0.005976626183837652, 'timestamp': '2025-09-10 02:40:55.099205', 'step': 2154, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:55.152582', 'step': 2154, 'epoch': 1} +{'type': 'loss', 'content': 0.0046859607100486755, 'timestamp': '2025-09-10 02:40:55.154925', 'step': 2155, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:55.207884', 'step': 2155, 'epoch': 1} +{'type': 'loss', 'content': 0.01543146837502718, 'timestamp': '2025-09-10 02:40:55.213688', 'step': 2156, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:55.273217', 'step': 2156, 'epoch': 1} +{'type': 'loss', 'content': 0.007171375211328268, 'timestamp': '2025-09-10 02:40:55.285022', 'step': 2157, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:55.339587', 'step': 2157, 'epoch': 1} +{'type': 'loss', 'content': 0.009496279060840607, 'timestamp': '2025-09-10 02:40:55.341636', 'step': 2158, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:55.393979', 'step': 2158, 'epoch': 1} +{'type': 'loss', 'content': 0.014618291519582272, 'timestamp': '2025-09-10 02:40:55.396219', 'step': 2159, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:55.449356', 'step': 2159, 'epoch': 1} +{'type': 'loss', 'content': 0.004825818818062544, 'timestamp': '2025-09-10 02:40:55.455153', 'step': 2160, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:40:55.507964', 'step': 2160, 'epoch': 1} +{'type': 'loss', 'content': 0.009887455962598324, 'timestamp': '2025-09-10 02:40:55.518244', 'step': 2161, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:55.572029', 'step': 2161, 'epoch': 1} +{'type': 'loss', 'content': 0.061893828213214874, 'timestamp': '2025-09-10 02:40:55.574466', 'step': 2162, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:55.627299', 'step': 2162, 'epoch': 1} +{'type': 'loss', 'content': 0.015684375539422035, 'timestamp': '2025-09-10 02:40:55.629443', 'step': 2163, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:55.690463', 'step': 2163, 'epoch': 1} +{'type': 'loss', 'content': 0.014504256658256054, 'timestamp': '2025-09-10 02:40:55.702200', 'step': 2164, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:55.754233', 'step': 2164, 'epoch': 1} +{'type': 'loss', 'content': 0.016790887340903282, 'timestamp': '2025-09-10 02:40:55.757285', 'step': 2165, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:55.809685', 'step': 2165, 'epoch': 1} +{'type': 'loss', 'content': 0.0051002162508666515, 'timestamp': '2025-09-10 02:40:55.816382', 'step': 2166, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:55.869298', 'step': 2166, 'epoch': 1} +{'type': 'loss', 'content': 0.0017120791599154472, 'timestamp': '2025-09-10 02:40:55.875886', 'step': 2167, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:55.929927', 'step': 2167, 'epoch': 1} +{'type': 'loss', 'content': 0.002449542749673128, 'timestamp': '2025-09-10 02:40:55.936920', 'step': 2168, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:55.988903', 'step': 2168, 'epoch': 1} +{'type': 'loss', 'content': 0.0005112888175062835, 'timestamp': '2025-09-10 02:40:55.991111', 'step': 2169, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:56.043516', 'step': 2169, 'epoch': 1} +{'type': 'loss', 'content': 0.00936068408191204, 'timestamp': '2025-09-10 02:40:56.045789', 'step': 2170, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:56.098994', 'step': 2170, 'epoch': 1} +{'type': 'loss', 'content': 0.004754960536956787, 'timestamp': '2025-09-10 02:40:56.105538', 'step': 2171, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:56.158525', 'step': 2171, 'epoch': 1} +{'type': 'loss', 'content': 0.02954014204442501, 'timestamp': '2025-09-10 02:40:56.164206', 'step': 2172, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:56.217165', 'step': 2172, 'epoch': 1} +{'type': 'loss', 'content': 0.026429308578372, 'timestamp': '2025-09-10 02:40:56.219295', 'step': 2173, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:56.272322', 'step': 2173, 'epoch': 1} +{'type': 'loss', 'content': 0.002102070953696966, 'timestamp': '2025-09-10 02:40:56.274593', 'step': 2174, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:56.327767', 'step': 2174, 'epoch': 1} +{'type': 'loss', 'content': 0.017273031175136566, 'timestamp': '2025-09-10 02:40:56.336133', 'step': 2175, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:56.391343', 'step': 2175, 'epoch': 1} +{'type': 'loss', 'content': 0.03424067422747612, 'timestamp': '2025-09-10 02:40:56.401921', 'step': 2176, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:56.454836', 'step': 2176, 'epoch': 1} +{'type': 'loss', 'content': 0.033699143677949905, 'timestamp': '2025-09-10 02:40:56.456892', 'step': 2177, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:56.509941', 'step': 2177, 'epoch': 1} +{'type': 'loss', 'content': 0.013751120306551456, 'timestamp': '2025-09-10 02:40:56.512183', 'step': 2178, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:56.564976', 'step': 2178, 'epoch': 1} +{'type': 'loss', 'content': 0.015615619719028473, 'timestamp': '2025-09-10 02:40:56.567043', 'step': 2179, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:40:56.624877', 'step': 2179, 'epoch': 1} +{'type': 'loss', 'content': 0.020907536149024963, 'timestamp': '2025-09-10 02:40:56.636115', 'step': 2180, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:56.689074', 'step': 2180, 'epoch': 1} +{'type': 'loss', 'content': 0.00806462112814188, 'timestamp': '2025-09-10 02:40:56.691226', 'step': 2181, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:56.744060', 'step': 2181, 'epoch': 1} +{'type': 'loss', 'content': 0.006408077199012041, 'timestamp': '2025-09-10 02:40:56.747221', 'step': 2182, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:56.800294', 'step': 2182, 'epoch': 1} +{'type': 'loss', 'content': 0.04697560518980026, 'timestamp': '2025-09-10 02:40:56.802424', 'step': 2183, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:40:56.855341', 'step': 2183, 'epoch': 1} +{'type': 'loss', 'content': 0.03236076980829239, 'timestamp': '2025-09-10 02:40:56.862648', 'step': 2184, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:56.915124', 'step': 2184, 'epoch': 1} +{'type': 'loss', 'content': 0.021621523424983025, 'timestamp': '2025-09-10 02:40:56.917396', 'step': 2185, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:56.969915', 'step': 2185, 'epoch': 1} +{'type': 'loss', 'content': 0.009808598086237907, 'timestamp': '2025-09-10 02:40:56.972331', 'step': 2186, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:40:57.033878', 'step': 2186, 'epoch': 1} +{'type': 'loss', 'content': 0.01728159561753273, 'timestamp': '2025-09-10 02:40:57.044793', 'step': 2187, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:57.097474', 'step': 2187, 'epoch': 1} +{'type': 'loss', 'content': 0.021557852625846863, 'timestamp': '2025-09-10 02:40:57.103062', 'step': 2188, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:57.155242', 'step': 2188, 'epoch': 1} +{'type': 'loss', 'content': 0.024152925238013268, 'timestamp': '2025-09-10 02:40:57.157486', 'step': 2189, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:57.210112', 'step': 2189, 'epoch': 1} +{'type': 'loss', 'content': 0.013515089638531208, 'timestamp': '2025-09-10 02:40:57.213394', 'step': 2190, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:40:57.285934', 'step': 2190, 'epoch': 1} +{'type': 'loss', 'content': 0.019097929820418358, 'timestamp': '2025-09-10 02:40:57.299450', 'step': 2191, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:57.352243', 'step': 2191, 'epoch': 1} +{'type': 'loss', 'content': 0.00467953085899353, 'timestamp': '2025-09-10 02:40:57.357916', 'step': 2192, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:57.410380', 'step': 2192, 'epoch': 1} +{'type': 'loss', 'content': 0.0023125980515033007, 'timestamp': '2025-09-10 02:40:57.412667', 'step': 2193, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:40:57.467289', 'step': 2193, 'epoch': 1} +{'type': 'loss', 'content': 0.004761739633977413, 'timestamp': '2025-09-10 02:40:57.477059', 'step': 2194, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:40:57.530097', 'step': 2194, 'epoch': 1} +{'type': 'loss', 'content': 0.0024532435927540064, 'timestamp': '2025-09-10 02:40:57.532371', 'step': 2195, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:40:57.585582', 'step': 2195, 'epoch': 1} +{'type': 'loss', 'content': 0.010808519087731838, 'timestamp': '2025-09-10 02:40:57.594656', 'step': 2196, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:57.646716', 'step': 2196, 'epoch': 1} +{'type': 'loss', 'content': 0.01729532890021801, 'timestamp': '2025-09-10 02:40:57.648816', 'step': 2197, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:40:57.700956', 'step': 2197, 'epoch': 1} +{'type': 'loss', 'content': 0.00806347094476223, 'timestamp': '2025-09-10 02:40:57.702942', 'step': 2198, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:40:57.755078', 'step': 2198, 'epoch': 1} +{'type': 'loss', 'content': 0.012199915014207363, 'timestamp': '2025-09-10 02:40:57.757256', 'step': 2199, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:57.809991', 'step': 2199, 'epoch': 1} +{'type': 'loss', 'content': 0.006157182157039642, 'timestamp': '2025-09-10 02:40:57.815658', 'step': 2200, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:40:57.867886', 'step': 2200, 'epoch': 1} +{'type': 'loss', 'content': 0.016080476343631744, 'timestamp': '2025-09-10 02:40:57.870082', 'step': 2201, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:40:57.922420', 'step': 2201, 'epoch': 1} +{'type': 'loss', 'content': 0.014938508160412312, 'timestamp': '2025-09-10 02:40:57.925617', 'step': 2202, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:40:57.978098', 'step': 2202, 'epoch': 1} +{'type': 'loss', 'content': 0.026552477851510048, 'timestamp': '2025-09-10 02:40:57.980221', 'step': 2203, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:40:58.032527', 'step': 2203, 'epoch': 1} +{'type': 'loss', 'content': 0.020586589351296425, 'timestamp': '2025-09-10 02:40:58.038587', 'step': 2204, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:40:58.091055', 'step': 2204, 'epoch': 1} +{'type': 'loss', 'content': 0.0026182897854596376, 'timestamp': '2025-09-10 02:40:58.093263', 'step': 2205, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:41:14.861620', 'step': 2205, 'epoch': 1} +{'type': 'pplx', 'content': 24177451.08196949, 'timestamp': '2025-09-10 02:41:14.864180', 'step': 2205, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:14.918501', 'step': 2205, 'epoch': 1} +{'type': 'loss', 'content': 0.02545582689344883, 'timestamp': '2025-09-10 02:41:14.925720', 'step': 2206, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:14.979257', 'step': 2206, 'epoch': 1} +{'type': 'loss', 'content': 0.005285558756440878, 'timestamp': '2025-09-10 02:41:14.981182', 'step': 2207, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:15.034496', 'step': 2207, 'epoch': 1} +{'type': 'loss', 'content': 0.018531398847699165, 'timestamp': '2025-09-10 02:41:15.041502', 'step': 2208, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:15.093854', 'step': 2208, 'epoch': 1} +{'type': 'loss', 'content': 0.01486459281295538, 'timestamp': '2025-09-10 02:41:15.095964', 'step': 2209, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:15.149956', 'step': 2209, 'epoch': 1} +{'type': 'loss', 'content': 0.00933418981730938, 'timestamp': '2025-09-10 02:41:15.152241', 'step': 2210, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:15.206019', 'step': 2210, 'epoch': 1} +{'type': 'loss', 'content': 0.01151957642287016, 'timestamp': '2025-09-10 02:41:15.208363', 'step': 2211, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:15.261884', 'step': 2211, 'epoch': 1} +{'type': 'loss', 'content': 0.006928191985934973, 'timestamp': '2025-09-10 02:41:15.268026', 'step': 2212, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:15.320551', 'step': 2212, 'epoch': 1} +{'type': 'loss', 'content': 0.004967629909515381, 'timestamp': '2025-09-10 02:41:15.322497', 'step': 2213, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:15.375229', 'step': 2213, 'epoch': 1} +{'type': 'loss', 'content': 0.0027123447507619858, 'timestamp': '2025-09-10 02:41:15.377353', 'step': 2214, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:15.430497', 'step': 2214, 'epoch': 1} +{'type': 'loss', 'content': 0.027462434023618698, 'timestamp': '2025-09-10 02:41:15.438472', 'step': 2215, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:15.491111', 'step': 2215, 'epoch': 1} +{'type': 'loss', 'content': 0.02530898153781891, 'timestamp': '2025-09-10 02:41:15.496994', 'step': 2216, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:15.549845', 'step': 2216, 'epoch': 1} +{'type': 'loss', 'content': 0.029349375516176224, 'timestamp': '2025-09-10 02:41:15.552603', 'step': 2217, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:15.605607', 'step': 2217, 'epoch': 1} +{'type': 'loss', 'content': 0.01407963689416647, 'timestamp': '2025-09-10 02:41:15.607514', 'step': 2218, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:15.660355', 'step': 2218, 'epoch': 1} +{'type': 'loss', 'content': 0.014234866015613079, 'timestamp': '2025-09-10 02:41:15.662322', 'step': 2219, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:15.715788', 'step': 2219, 'epoch': 1} +{'type': 'loss', 'content': 0.014596564695239067, 'timestamp': '2025-09-10 02:41:15.726161', 'step': 2220, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:15.782530', 'step': 2220, 'epoch': 1} +{'type': 'loss', 'content': 0.01654386892914772, 'timestamp': '2025-09-10 02:41:15.785840', 'step': 2221, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:15.839424', 'step': 2221, 'epoch': 1} +{'type': 'loss', 'content': 0.00550526799634099, 'timestamp': '2025-09-10 02:41:15.841382', 'step': 2222, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:15.895205', 'step': 2222, 'epoch': 1} +{'type': 'loss', 'content': 0.002153964713215828, 'timestamp': '2025-09-10 02:41:15.897507', 'step': 2223, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:15.951316', 'step': 2223, 'epoch': 1} +{'type': 'loss', 'content': 0.008730700239539146, 'timestamp': '2025-09-10 02:41:15.958134', 'step': 2224, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:16.010643', 'step': 2224, 'epoch': 1} +{'type': 'loss', 'content': 0.006057410035282373, 'timestamp': '2025-09-10 02:41:16.017053', 'step': 2225, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:16.070410', 'step': 2225, 'epoch': 1} +{'type': 'loss', 'content': 0.018048031255602837, 'timestamp': '2025-09-10 02:41:16.080056', 'step': 2226, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:16.133055', 'step': 2226, 'epoch': 1} +{'type': 'loss', 'content': 0.009440673515200615, 'timestamp': '2025-09-10 02:41:16.135249', 'step': 2227, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:16.188960', 'step': 2227, 'epoch': 1} +{'type': 'loss', 'content': 0.001368698780424893, 'timestamp': '2025-09-10 02:41:16.194938', 'step': 2228, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:16.247574', 'step': 2228, 'epoch': 1} +{'type': 'loss', 'content': 0.014728554524481297, 'timestamp': '2025-09-10 02:41:16.249578', 'step': 2229, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:16.302032', 'step': 2229, 'epoch': 1} +{'type': 'loss', 'content': 0.011189117096364498, 'timestamp': '2025-09-10 02:41:16.304251', 'step': 2230, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:41:16.377366', 'step': 2230, 'epoch': 1} +{'type': 'loss', 'content': 0.016204727813601494, 'timestamp': '2025-09-10 02:41:16.391071', 'step': 2231, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:16.444465', 'step': 2231, 'epoch': 1} +{'type': 'loss', 'content': 0.01046925038099289, 'timestamp': '2025-09-10 02:41:16.451792', 'step': 2232, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:16.504617', 'step': 2232, 'epoch': 1} +{'type': 'loss', 'content': 0.012259201146662235, 'timestamp': '2025-09-10 02:41:16.507474', 'step': 2233, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:16.562099', 'step': 2233, 'epoch': 1} +{'type': 'loss', 'content': 0.002176963957026601, 'timestamp': '2025-09-10 02:41:16.564093', 'step': 2234, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:16.617245', 'step': 2234, 'epoch': 1} +{'type': 'loss', 'content': 0.011239242739975452, 'timestamp': '2025-09-10 02:41:16.619515', 'step': 2235, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:41:16.687523', 'step': 2235, 'epoch': 1} +{'type': 'loss', 'content': 0.0023203168530017138, 'timestamp': '2025-09-10 02:41:16.700911', 'step': 2236, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:16.753839', 'step': 2236, 'epoch': 1} +{'type': 'loss', 'content': 0.0014164808671921492, 'timestamp': '2025-09-10 02:41:16.756042', 'step': 2237, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:16.808492', 'step': 2237, 'epoch': 1} +{'type': 'loss', 'content': 0.022817770019173622, 'timestamp': '2025-09-10 02:41:16.810522', 'step': 2238, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:16.862688', 'step': 2238, 'epoch': 1} +{'type': 'loss', 'content': 0.026868876069784164, 'timestamp': '2025-09-10 02:41:16.865808', 'step': 2239, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:16.919119', 'step': 2239, 'epoch': 1} +{'type': 'loss', 'content': 0.017978010699152946, 'timestamp': '2025-09-10 02:41:16.927878', 'step': 2240, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:16.981530', 'step': 2240, 'epoch': 1} +{'type': 'loss', 'content': 0.006993381772190332, 'timestamp': '2025-09-10 02:41:16.983675', 'step': 2241, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:17.042397', 'step': 2241, 'epoch': 1} +{'type': 'loss', 'content': 0.012187718413770199, 'timestamp': '2025-09-10 02:41:17.052816', 'step': 2242, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:17.106811', 'step': 2242, 'epoch': 1} +{'type': 'loss', 'content': 0.03987843915820122, 'timestamp': '2025-09-10 02:41:17.108917', 'step': 2243, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:17.163179', 'step': 2243, 'epoch': 1} +{'type': 'loss', 'content': 0.01238629873842001, 'timestamp': '2025-09-10 02:41:17.170236', 'step': 2244, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:17.224617', 'step': 2244, 'epoch': 1} +{'type': 'loss', 'content': 0.005961086135357618, 'timestamp': '2025-09-10 02:41:17.227023', 'step': 2245, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:41:17.302355', 'step': 2245, 'epoch': 1} +{'type': 'loss', 'content': 0.01882362924516201, 'timestamp': '2025-09-10 02:41:17.316295', 'step': 2246, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:17.369839', 'step': 2246, 'epoch': 1} +{'type': 'loss', 'content': 0.00610742811113596, 'timestamp': '2025-09-10 02:41:17.376321', 'step': 2247, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:17.429804', 'step': 2247, 'epoch': 1} +{'type': 'loss', 'content': 0.004611399490386248, 'timestamp': '2025-09-10 02:41:17.437053', 'step': 2248, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:17.489471', 'step': 2248, 'epoch': 1} +{'type': 'loss', 'content': 0.004369756672531366, 'timestamp': '2025-09-10 02:41:17.491843', 'step': 2249, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:17.544442', 'step': 2249, 'epoch': 1} +{'type': 'loss', 'content': 0.019328685477375984, 'timestamp': '2025-09-10 02:41:17.546834', 'step': 2250, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:17.599326', 'step': 2250, 'epoch': 1} +{'type': 'loss', 'content': 0.0016948387492448092, 'timestamp': '2025-09-10 02:41:17.602584', 'step': 2251, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:41:17.670376', 'step': 2251, 'epoch': 1} +{'type': 'loss', 'content': 0.0033548062201589346, 'timestamp': '2025-09-10 02:41:17.683726', 'step': 2252, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:17.741857', 'step': 2252, 'epoch': 1} +{'type': 'loss', 'content': 0.003852900117635727, 'timestamp': '2025-09-10 02:41:17.748685', 'step': 2253, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:17.808310', 'step': 2253, 'epoch': 1} +{'type': 'loss', 'content': 0.0017933115595951676, 'timestamp': '2025-09-10 02:41:17.811884', 'step': 2254, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:17.869147', 'step': 2254, 'epoch': 1} +{'type': 'loss', 'content': 0.010794803500175476, 'timestamp': '2025-09-10 02:41:17.873143', 'step': 2255, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:17.928256', 'step': 2255, 'epoch': 1} +{'type': 'loss', 'content': 0.01731909066438675, 'timestamp': '2025-09-10 02:41:17.934184', 'step': 2256, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:41:17.995399', 'step': 2256, 'epoch': 1} +{'type': 'loss', 'content': 0.01035644207149744, 'timestamp': '2025-09-10 02:41:18.007186', 'step': 2257, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:18.062789', 'step': 2257, 'epoch': 1} +{'type': 'loss', 'content': 0.019283056259155273, 'timestamp': '2025-09-10 02:41:18.064940', 'step': 2258, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:18.120668', 'step': 2258, 'epoch': 1} +{'type': 'loss', 'content': 0.010629468597471714, 'timestamp': '2025-09-10 02:41:18.123305', 'step': 2259, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:18.176668', 'step': 2259, 'epoch': 1} +{'type': 'loss', 'content': 0.013493156991899014, 'timestamp': '2025-09-10 02:41:18.182843', 'step': 2260, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:18.236818', 'step': 2260, 'epoch': 1} +{'type': 'loss', 'content': 0.011419164016842842, 'timestamp': '2025-09-10 02:41:18.243436', 'step': 2261, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:18.300132', 'step': 2261, 'epoch': 1} +{'type': 'loss', 'content': 0.020291399210691452, 'timestamp': '2025-09-10 02:41:18.302517', 'step': 2262, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:41:18.375160', 'step': 2262, 'epoch': 1} +{'type': 'loss', 'content': 0.003984835464507341, 'timestamp': '2025-09-10 02:41:18.387351', 'step': 2263, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:18.442897', 'step': 2263, 'epoch': 1} +{'type': 'loss', 'content': 0.0030595448333770037, 'timestamp': '2025-09-10 02:41:18.449228', 'step': 2264, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:18.506257', 'step': 2264, 'epoch': 1} +{'type': 'loss', 'content': 0.006595213431864977, 'timestamp': '2025-09-10 02:41:18.513844', 'step': 2265, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:18.578026', 'step': 2265, 'epoch': 1} +{'type': 'loss', 'content': 0.022331099957227707, 'timestamp': '2025-09-10 02:41:18.588419', 'step': 2266, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:18.643790', 'step': 2266, 'epoch': 1} +{'type': 'loss', 'content': 0.0034477750305086374, 'timestamp': '2025-09-10 02:41:18.653342', 'step': 2267, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:18.706215', 'step': 2267, 'epoch': 1} +{'type': 'loss', 'content': 0.02623889409005642, 'timestamp': '2025-09-10 02:41:18.712194', 'step': 2268, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:18.764351', 'step': 2268, 'epoch': 1} +{'type': 'loss', 'content': 0.026746975257992744, 'timestamp': '2025-09-10 02:41:18.774648', 'step': 2269, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:18.828138', 'step': 2269, 'epoch': 1} +{'type': 'loss', 'content': 0.01220802403986454, 'timestamp': '2025-09-10 02:41:18.830280', 'step': 2270, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:18.888155', 'step': 2270, 'epoch': 1} +{'type': 'loss', 'content': 0.0011245275381952524, 'timestamp': '2025-09-10 02:41:18.898608', 'step': 2271, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:18.953462', 'step': 2271, 'epoch': 1} +{'type': 'loss', 'content': 0.003515868680551648, 'timestamp': '2025-09-10 02:41:18.964064', 'step': 2272, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:19.016398', 'step': 2272, 'epoch': 1} +{'type': 'loss', 'content': 0.018965017050504684, 'timestamp': '2025-09-10 02:41:19.018443', 'step': 2273, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:19.071015', 'step': 2273, 'epoch': 1} +{'type': 'loss', 'content': 0.0020753650460392237, 'timestamp': '2025-09-10 02:41:19.073122', 'step': 2274, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:19.125775', 'step': 2274, 'epoch': 1} +{'type': 'loss', 'content': 0.002205053111538291, 'timestamp': '2025-09-10 02:41:19.127950', 'step': 2275, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:19.180845', 'step': 2275, 'epoch': 1} +{'type': 'loss', 'content': 0.006280322093516588, 'timestamp': '2025-09-10 02:41:19.186799', 'step': 2276, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:19.239786', 'step': 2276, 'epoch': 1} +{'type': 'loss', 'content': 0.013156109489500523, 'timestamp': '2025-09-10 02:41:19.241832', 'step': 2277, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:19.295259', 'step': 2277, 'epoch': 1} +{'type': 'loss', 'content': 0.023661980405449867, 'timestamp': '2025-09-10 02:41:19.297267', 'step': 2278, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:19.350321', 'step': 2278, 'epoch': 1} +{'type': 'loss', 'content': 0.0168520025908947, 'timestamp': '2025-09-10 02:41:19.352454', 'step': 2279, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:19.406007', 'step': 2279, 'epoch': 1} +{'type': 'loss', 'content': 0.001595528912730515, 'timestamp': '2025-09-10 02:41:19.412906', 'step': 2280, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:19.465162', 'step': 2280, 'epoch': 1} +{'type': 'loss', 'content': 0.002066161250695586, 'timestamp': '2025-09-10 02:41:19.467152', 'step': 2281, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:41:19.528441', 'step': 2281, 'epoch': 1} +{'type': 'loss', 'content': 0.010385619476437569, 'timestamp': '2025-09-10 02:41:19.539534', 'step': 2282, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:41:19.600765', 'step': 2282, 'epoch': 1} +{'type': 'loss', 'content': 0.009713256731629372, 'timestamp': '2025-09-10 02:41:19.611709', 'step': 2283, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:19.664566', 'step': 2283, 'epoch': 1} +{'type': 'loss', 'content': 0.001635165186598897, 'timestamp': '2025-09-10 02:41:19.670128', 'step': 2284, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:41:19.729270', 'step': 2284, 'epoch': 1} +{'type': 'loss', 'content': 0.004750643391162157, 'timestamp': '2025-09-10 02:41:19.741035', 'step': 2285, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:19.794319', 'step': 2285, 'epoch': 1} +{'type': 'loss', 'content': 0.03609858825802803, 'timestamp': '2025-09-10 02:41:19.796548', 'step': 2286, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:19.849486', 'step': 2286, 'epoch': 1} +{'type': 'loss', 'content': 0.013071305118501186, 'timestamp': '2025-09-10 02:41:19.852437', 'step': 2287, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:19.905906', 'step': 2287, 'epoch': 1} +{'type': 'loss', 'content': 0.0013990921434015036, 'timestamp': '2025-09-10 02:41:19.911706', 'step': 2288, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:19.963843', 'step': 2288, 'epoch': 1} +{'type': 'loss', 'content': 0.037052638828754425, 'timestamp': '2025-09-10 02:41:19.966099', 'step': 2289, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:20.019295', 'step': 2289, 'epoch': 1} +{'type': 'loss', 'content': 0.020622368901968002, 'timestamp': '2025-09-10 02:41:20.021174', 'step': 2290, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:20.075321', 'step': 2290, 'epoch': 1} +{'type': 'loss', 'content': 0.0023187126498669386, 'timestamp': '2025-09-10 02:41:20.085156', 'step': 2291, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:20.138387', 'step': 2291, 'epoch': 1} +{'type': 'loss', 'content': 0.0026464611291885376, 'timestamp': '2025-09-10 02:41:20.144565', 'step': 2292, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:20.197490', 'step': 2292, 'epoch': 1} +{'type': 'loss', 'content': 0.0021855831146240234, 'timestamp': '2025-09-10 02:41:20.199834', 'step': 2293, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:20.252235', 'step': 2293, 'epoch': 1} +{'type': 'loss', 'content': 0.020060395821928978, 'timestamp': '2025-09-10 02:41:20.258773', 'step': 2294, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:20.311938', 'step': 2294, 'epoch': 1} +{'type': 'loss', 'content': 0.002736916532739997, 'timestamp': '2025-09-10 02:41:20.318564', 'step': 2295, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:20.371618', 'step': 2295, 'epoch': 1} +{'type': 'loss', 'content': 0.0022916479501873255, 'timestamp': '2025-09-10 02:41:20.377491', 'step': 2296, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:20.429420', 'step': 2296, 'epoch': 1} +{'type': 'loss', 'content': 0.012775218114256859, 'timestamp': '2025-09-10 02:41:20.431779', 'step': 2297, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:20.484961', 'step': 2297, 'epoch': 1} +{'type': 'loss', 'content': 0.024944672361016273, 'timestamp': '2025-09-10 02:41:20.487135', 'step': 2298, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:20.540107', 'step': 2298, 'epoch': 1} +{'type': 'loss', 'content': 0.004370290320366621, 'timestamp': '2025-09-10 02:41:20.542171', 'step': 2299, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:20.595338', 'step': 2299, 'epoch': 1} +{'type': 'loss', 'content': 0.006103006657212973, 'timestamp': '2025-09-10 02:41:20.604304', 'step': 2300, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:20.657144', 'step': 2300, 'epoch': 1} +{'type': 'loss', 'content': 0.009504346176981926, 'timestamp': '2025-09-10 02:41:20.659242', 'step': 2301, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:20.712311', 'step': 2301, 'epoch': 1} +{'type': 'loss', 'content': 0.0023260193411260843, 'timestamp': '2025-09-10 02:41:20.714627', 'step': 2302, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:20.768174', 'step': 2302, 'epoch': 1} +{'type': 'loss', 'content': 0.003965396899729967, 'timestamp': '2025-09-10 02:41:20.776187', 'step': 2303, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:20.834491', 'step': 2303, 'epoch': 1} +{'type': 'loss', 'content': 0.007237962447106838, 'timestamp': '2025-09-10 02:41:20.845731', 'step': 2304, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:20.898655', 'step': 2304, 'epoch': 1} +{'type': 'loss', 'content': 0.013981464318931103, 'timestamp': '2025-09-10 02:41:20.900768', 'step': 2305, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:41:20.967264', 'step': 2305, 'epoch': 1} +{'type': 'loss', 'content': 0.008471324108541012, 'timestamp': '2025-09-10 02:41:20.979513', 'step': 2306, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:21.032229', 'step': 2306, 'epoch': 1} +{'type': 'loss', 'content': 0.007229648530483246, 'timestamp': '2025-09-10 02:41:21.035154', 'step': 2307, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:41:21.108651', 'step': 2307, 'epoch': 1} +{'type': 'loss', 'content': 0.023853302001953125, 'timestamp': '2025-09-10 02:41:21.123137', 'step': 2308, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:21.175783', 'step': 2308, 'epoch': 1} +{'type': 'loss', 'content': 0.012786195613443851, 'timestamp': '2025-09-10 02:41:21.178011', 'step': 2309, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:21.231153', 'step': 2309, 'epoch': 1} +{'type': 'loss', 'content': 0.02114769257605076, 'timestamp': '2025-09-10 02:41:21.233243', 'step': 2310, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:21.286425', 'step': 2310, 'epoch': 1} +{'type': 'loss', 'content': 0.04166872054338455, 'timestamp': '2025-09-10 02:41:21.288590', 'step': 2311, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:21.341390', 'step': 2311, 'epoch': 1} +{'type': 'loss', 'content': 0.01378029864281416, 'timestamp': '2025-09-10 02:41:21.348605', 'step': 2312, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:21.401840', 'step': 2312, 'epoch': 1} +{'type': 'loss', 'content': 0.00712332921102643, 'timestamp': '2025-09-10 02:41:21.403853', 'step': 2313, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:21.456243', 'step': 2313, 'epoch': 1} +{'type': 'loss', 'content': 0.013273650780320168, 'timestamp': '2025-09-10 02:41:21.464496', 'step': 2314, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:21.517483', 'step': 2314, 'epoch': 1} +{'type': 'loss', 'content': 0.012544920668005943, 'timestamp': '2025-09-10 02:41:21.524028', 'step': 2315, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:21.577238', 'step': 2315, 'epoch': 1} +{'type': 'loss', 'content': 0.002789823804050684, 'timestamp': '2025-09-10 02:41:21.583022', 'step': 2316, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:41:21.635007', 'step': 2316, 'epoch': 1} +{'type': 'loss', 'content': 0.005540736485272646, 'timestamp': '2025-09-10 02:41:21.637245', 'step': 2317, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:21.690185', 'step': 2317, 'epoch': 1} +{'type': 'loss', 'content': 0.007694281172007322, 'timestamp': '2025-09-10 02:41:21.692272', 'step': 2318, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:21.746795', 'step': 2318, 'epoch': 1} +{'type': 'loss', 'content': 0.013156984932720661, 'timestamp': '2025-09-10 02:41:21.756619', 'step': 2319, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:21.809346', 'step': 2319, 'epoch': 1} +{'type': 'loss', 'content': 0.0038832908030599356, 'timestamp': '2025-09-10 02:41:21.815060', 'step': 2320, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:21.867280', 'step': 2320, 'epoch': 1} +{'type': 'loss', 'content': 0.027881622314453125, 'timestamp': '2025-09-10 02:41:21.869552', 'step': 2321, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:21.929849', 'step': 2321, 'epoch': 1} +{'type': 'loss', 'content': 0.02795393206179142, 'timestamp': '2025-09-10 02:41:21.940547', 'step': 2322, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:22.000946', 'step': 2322, 'epoch': 1} +{'type': 'loss', 'content': 0.004871138371527195, 'timestamp': '2025-09-10 02:41:22.011684', 'step': 2323, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:22.064313', 'step': 2323, 'epoch': 1} +{'type': 'loss', 'content': 0.0025887552183121443, 'timestamp': '2025-09-10 02:41:22.070612', 'step': 2324, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:22.123054', 'step': 2324, 'epoch': 1} +{'type': 'loss', 'content': 0.0193661879748106, 'timestamp': '2025-09-10 02:41:22.131332', 'step': 2325, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:22.184154', 'step': 2325, 'epoch': 1} +{'type': 'loss', 'content': 0.001558991032652557, 'timestamp': '2025-09-10 02:41:22.190776', 'step': 2326, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:41:22.252546', 'step': 2326, 'epoch': 1} +{'type': 'loss', 'content': 0.011013707146048546, 'timestamp': '2025-09-10 02:41:22.263674', 'step': 2327, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:41:22.336472', 'step': 2327, 'epoch': 1} +{'type': 'loss', 'content': 0.006773136556148529, 'timestamp': '2025-09-10 02:41:22.350749', 'step': 2328, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:22.403496', 'step': 2328, 'epoch': 1} +{'type': 'loss', 'content': 0.003129596123471856, 'timestamp': '2025-09-10 02:41:22.405834', 'step': 2329, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:22.458464', 'step': 2329, 'epoch': 1} +{'type': 'loss', 'content': 0.017954537644982338, 'timestamp': '2025-09-10 02:41:22.460523', 'step': 2330, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:22.518551', 'step': 2330, 'epoch': 1} +{'type': 'loss', 'content': 0.004090786445885897, 'timestamp': '2025-09-10 02:41:22.528966', 'step': 2331, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:22.585169', 'step': 2331, 'epoch': 1} +{'type': 'loss', 'content': 0.008648094721138477, 'timestamp': '2025-09-10 02:41:22.592305', 'step': 2332, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:22.645362', 'step': 2332, 'epoch': 1} +{'type': 'loss', 'content': 0.01296969223767519, 'timestamp': '2025-09-10 02:41:22.647523', 'step': 2333, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:22.701127', 'step': 2333, 'epoch': 1} +{'type': 'loss', 'content': 0.012112529948353767, 'timestamp': '2025-09-10 02:41:22.710726', 'step': 2334, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:22.763657', 'step': 2334, 'epoch': 1} +{'type': 'loss', 'content': 0.004430691245943308, 'timestamp': '2025-09-10 02:41:22.765892', 'step': 2335, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:22.819069', 'step': 2335, 'epoch': 1} +{'type': 'loss', 'content': 0.01480043400079012, 'timestamp': '2025-09-10 02:41:22.824959', 'step': 2336, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:22.877693', 'step': 2336, 'epoch': 1} +{'type': 'loss', 'content': 0.0011013038456439972, 'timestamp': '2025-09-10 02:41:22.879858', 'step': 2337, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:41:22.952336', 'step': 2337, 'epoch': 1} +{'type': 'loss', 'content': 0.0402669832110405, 'timestamp': '2025-09-10 02:41:22.965808', 'step': 2338, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:23.019273', 'step': 2338, 'epoch': 1} +{'type': 'loss', 'content': 0.002841313136741519, 'timestamp': '2025-09-10 02:41:23.021275', 'step': 2339, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:23.074502', 'step': 2339, 'epoch': 1} +{'type': 'loss', 'content': 0.008241831324994564, 'timestamp': '2025-09-10 02:41:23.080247', 'step': 2340, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:23.136588', 'step': 2340, 'epoch': 1} +{'type': 'loss', 'content': 0.03805210813879967, 'timestamp': '2025-09-10 02:41:23.147809', 'step': 2341, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:41:23.218593', 'step': 2341, 'epoch': 1} +{'type': 'loss', 'content': 0.00663991691544652, 'timestamp': '2025-09-10 02:41:23.231512', 'step': 2342, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:23.285044', 'step': 2342, 'epoch': 1} +{'type': 'loss', 'content': 0.011308408342301846, 'timestamp': '2025-09-10 02:41:23.287758', 'step': 2343, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:23.340550', 'step': 2343, 'epoch': 1} +{'type': 'loss', 'content': 0.005613468121737242, 'timestamp': '2025-09-10 02:41:23.346532', 'step': 2344, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:23.398457', 'step': 2344, 'epoch': 1} +{'type': 'loss', 'content': 0.019076507538557053, 'timestamp': '2025-09-10 02:41:23.401404', 'step': 2345, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:23.454352', 'step': 2345, 'epoch': 1} +{'type': 'loss', 'content': 0.015190249308943748, 'timestamp': '2025-09-10 02:41:23.456783', 'step': 2346, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:23.509982', 'step': 2346, 'epoch': 1} +{'type': 'loss', 'content': 0.03243137523531914, 'timestamp': '2025-09-10 02:41:23.512181', 'step': 2347, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:23.566203', 'step': 2347, 'epoch': 1} +{'type': 'loss', 'content': 0.0007310754735954106, 'timestamp': '2025-09-10 02:41:23.575244', 'step': 2348, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:23.627876', 'step': 2348, 'epoch': 1} +{'type': 'loss', 'content': 0.0011788978008553386, 'timestamp': '2025-09-10 02:41:23.629824', 'step': 2349, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:23.689973', 'step': 2349, 'epoch': 1} +{'type': 'loss', 'content': 0.024757685139775276, 'timestamp': '2025-09-10 02:41:23.700602', 'step': 2350, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:23.754312', 'step': 2350, 'epoch': 1} +{'type': 'loss', 'content': 0.0053327870555222034, 'timestamp': '2025-09-10 02:41:23.756596', 'step': 2351, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:41:23.852810', 'step': 2351, 'epoch': 1} +{'type': 'loss', 'content': 0.020339807495474815, 'timestamp': '2025-09-10 02:41:23.872109', 'step': 2352, 'epoch': 1} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:41:40.656522', 'step': 2352, 'epoch': 1} +{'type': 'pplx', 'content': 27640302.91922767, 'timestamp': '2025-09-10 02:41:40.659223', 'step': 2352, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:40.712789', 'step': 2352, 'epoch': 1} +{'type': 'loss', 'content': 0.013624520972371101, 'timestamp': '2025-09-10 02:41:40.717892', 'step': 2353, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:40.771968', 'step': 2353, 'epoch': 1} +{'type': 'loss', 'content': 0.0037309457547962666, 'timestamp': '2025-09-10 02:41:40.774045', 'step': 2354, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:40.827033', 'step': 2354, 'epoch': 1} +{'type': 'loss', 'content': 0.028440816327929497, 'timestamp': '2025-09-10 02:41:40.829076', 'step': 2355, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:41:40.908970', 'step': 2355, 'epoch': 1} +{'type': 'loss', 'content': 0.002316842321306467, 'timestamp': '2025-09-10 02:41:40.924787', 'step': 2356, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 3360020475552.0}, 'timestamp': '2025-09-10 02:41:40.994475', 'step': 2356, 'epoch': 1} +{'type': 'loss', 'content': 0.00621901685371995, 'timestamp': '2025-09-10 02:41:40.996695', 'step': 2357, 'epoch': 1} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:41.053972', 'step': 2357, 'epoch': 2} +{'type': 'loss', 'content': 0.002619270933791995, 'timestamp': '2025-09-10 02:41:41.056175', 'step': 2358, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:41.109441', 'step': 2358, 'epoch': 2} +{'type': 'loss', 'content': 0.00830973032861948, 'timestamp': '2025-09-10 02:41:41.112235', 'step': 2359, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:41.165134', 'step': 2359, 'epoch': 2} +{'type': 'loss', 'content': 0.02099130116403103, 'timestamp': '2025-09-10 02:41:41.172128', 'step': 2360, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:41.225664', 'step': 2360, 'epoch': 2} +{'type': 'loss', 'content': 0.0021291126031428576, 'timestamp': '2025-09-10 02:41:41.227614', 'step': 2361, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:41.280742', 'step': 2361, 'epoch': 2} +{'type': 'loss', 'content': 0.000321090774377808, 'timestamp': '2025-09-10 02:41:41.283067', 'step': 2362, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:41.335560', 'step': 2362, 'epoch': 2} +{'type': 'loss', 'content': 0.012044147588312626, 'timestamp': '2025-09-10 02:41:41.337637', 'step': 2363, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:41.391314', 'step': 2363, 'epoch': 2} +{'type': 'loss', 'content': 0.04186839610338211, 'timestamp': '2025-09-10 02:41:41.397142', 'step': 2364, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:41.449450', 'step': 2364, 'epoch': 2} +{'type': 'loss', 'content': 0.03821462020277977, 'timestamp': '2025-09-10 02:41:41.451518', 'step': 2365, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:41.504028', 'step': 2365, 'epoch': 2} +{'type': 'loss', 'content': 0.02326563559472561, 'timestamp': '2025-09-10 02:41:41.510517', 'step': 2366, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:41.562820', 'step': 2366, 'epoch': 2} +{'type': 'loss', 'content': 0.005296733230352402, 'timestamp': '2025-09-10 02:41:41.565831', 'step': 2367, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:41.618470', 'step': 2367, 'epoch': 2} +{'type': 'loss', 'content': 0.0141748683527112, 'timestamp': '2025-09-10 02:41:41.624272', 'step': 2368, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:41.676324', 'step': 2368, 'epoch': 2} +{'type': 'loss', 'content': 0.03244634345173836, 'timestamp': '2025-09-10 02:41:41.678403', 'step': 2369, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:41.731091', 'step': 2369, 'epoch': 2} +{'type': 'loss', 'content': 0.0030230763368308544, 'timestamp': '2025-09-10 02:41:41.733172', 'step': 2370, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:41.790556', 'step': 2370, 'epoch': 2} +{'type': 'loss', 'content': 0.0023990808986127377, 'timestamp': '2025-09-10 02:41:41.801018', 'step': 2371, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:41.853887', 'step': 2371, 'epoch': 2} +{'type': 'loss', 'content': 0.0029439318459481, 'timestamp': '2025-09-10 02:41:41.859587', 'step': 2372, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:41.912190', 'step': 2372, 'epoch': 2} +{'type': 'loss', 'content': 0.001223023165948689, 'timestamp': '2025-09-10 02:41:41.914393', 'step': 2373, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:41.967738', 'step': 2373, 'epoch': 2} +{'type': 'loss', 'content': 0.022844061255455017, 'timestamp': '2025-09-10 02:41:41.977369', 'step': 2374, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:42.030311', 'step': 2374, 'epoch': 2} +{'type': 'loss', 'content': 0.007408964913338423, 'timestamp': '2025-09-10 02:41:42.032352', 'step': 2375, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:42.085178', 'step': 2375, 'epoch': 2} +{'type': 'loss', 'content': 0.005025565158575773, 'timestamp': '2025-09-10 02:41:42.092159', 'step': 2376, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:42.144032', 'step': 2376, 'epoch': 2} +{'type': 'loss', 'content': 0.003703473135828972, 'timestamp': '2025-09-10 02:41:42.146175', 'step': 2377, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:42.199586', 'step': 2377, 'epoch': 2} +{'type': 'loss', 'content': 0.023201072588562965, 'timestamp': '2025-09-10 02:41:42.205988', 'step': 2378, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:42.258629', 'step': 2378, 'epoch': 2} +{'type': 'loss', 'content': 0.0037703088019043207, 'timestamp': '2025-09-10 02:41:42.260669', 'step': 2379, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:42.313216', 'step': 2379, 'epoch': 2} +{'type': 'loss', 'content': 0.0029380829073488712, 'timestamp': '2025-09-10 02:41:42.319045', 'step': 2380, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:42.371988', 'step': 2380, 'epoch': 2} +{'type': 'loss', 'content': 0.005905658472329378, 'timestamp': '2025-09-10 02:41:42.373911', 'step': 2381, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:42.426887', 'step': 2381, 'epoch': 2} +{'type': 'loss', 'content': 0.012931009754538536, 'timestamp': '2025-09-10 02:41:42.428858', 'step': 2382, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:42.481751', 'step': 2382, 'epoch': 2} +{'type': 'loss', 'content': 0.0017085112631320953, 'timestamp': '2025-09-10 02:41:42.484046', 'step': 2383, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:42.538714', 'step': 2383, 'epoch': 2} +{'type': 'loss', 'content': 0.009159311652183533, 'timestamp': '2025-09-10 02:41:42.544295', 'step': 2384, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:42.596530', 'step': 2384, 'epoch': 2} +{'type': 'loss', 'content': 0.008803758770227432, 'timestamp': '2025-09-10 02:41:42.598603', 'step': 2385, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:42.652099', 'step': 2385, 'epoch': 2} +{'type': 'loss', 'content': 0.041297849267721176, 'timestamp': '2025-09-10 02:41:42.654294', 'step': 2386, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:42.714937', 'step': 2386, 'epoch': 2} +{'type': 'loss', 'content': 0.0009001196012832224, 'timestamp': '2025-09-10 02:41:42.725677', 'step': 2387, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:42.778458', 'step': 2387, 'epoch': 2} +{'type': 'loss', 'content': 0.0030867415480315685, 'timestamp': '2025-09-10 02:41:42.784303', 'step': 2388, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:42.836301', 'step': 2388, 'epoch': 2} +{'type': 'loss', 'content': 0.014532825909554958, 'timestamp': '2025-09-10 02:41:42.838335', 'step': 2389, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:42.891475', 'step': 2389, 'epoch': 2} +{'type': 'loss', 'content': 0.009635944850742817, 'timestamp': '2025-09-10 02:41:42.893800', 'step': 2390, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:42.947016', 'step': 2390, 'epoch': 2} +{'type': 'loss', 'content': 0.018802549690008163, 'timestamp': '2025-09-10 02:41:42.949831', 'step': 2391, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:43.002760', 'step': 2391, 'epoch': 2} +{'type': 'loss', 'content': 0.021239126101136208, 'timestamp': '2025-09-10 02:41:43.010007', 'step': 2392, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:43.061931', 'step': 2392, 'epoch': 2} +{'type': 'loss', 'content': 0.011052722111344337, 'timestamp': '2025-09-10 02:41:43.064111', 'step': 2393, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:43.116832', 'step': 2393, 'epoch': 2} +{'type': 'loss', 'content': 0.0025772773660719395, 'timestamp': '2025-09-10 02:41:43.118920', 'step': 2394, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:43.171460', 'step': 2394, 'epoch': 2} +{'type': 'loss', 'content': 0.00867217592895031, 'timestamp': '2025-09-10 02:41:43.173600', 'step': 2395, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:43.225982', 'step': 2395, 'epoch': 2} +{'type': 'loss', 'content': 0.03286437317728996, 'timestamp': '2025-09-10 02:41:43.231653', 'step': 2396, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:43.283865', 'step': 2396, 'epoch': 2} +{'type': 'loss', 'content': 0.016917405650019646, 'timestamp': '2025-09-10 02:41:43.285938', 'step': 2397, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:43.339107', 'step': 2397, 'epoch': 2} +{'type': 'loss', 'content': 0.03374059125781059, 'timestamp': '2025-09-10 02:41:43.341465', 'step': 2398, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:43.395050', 'step': 2398, 'epoch': 2} +{'type': 'loss', 'content': 0.008633555844426155, 'timestamp': '2025-09-10 02:41:43.397434', 'step': 2399, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:43.455902', 'step': 2399, 'epoch': 2} +{'type': 'loss', 'content': 0.007943732663989067, 'timestamp': '2025-09-10 02:41:43.467117', 'step': 2400, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:43.519208', 'step': 2400, 'epoch': 2} +{'type': 'loss', 'content': 0.0014182468876242638, 'timestamp': '2025-09-10 02:41:43.521300', 'step': 2401, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:43.574009', 'step': 2401, 'epoch': 2} +{'type': 'loss', 'content': 0.032707855105400085, 'timestamp': '2025-09-10 02:41:43.576269', 'step': 2402, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:43.628994', 'step': 2402, 'epoch': 2} +{'type': 'loss', 'content': 0.0147025715559721, 'timestamp': '2025-09-10 02:41:43.631213', 'step': 2403, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:43.684120', 'step': 2403, 'epoch': 2} +{'type': 'loss', 'content': 0.008943646214902401, 'timestamp': '2025-09-10 02:41:43.691273', 'step': 2404, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:43.743774', 'step': 2404, 'epoch': 2} +{'type': 'loss', 'content': 0.003356833476573229, 'timestamp': '2025-09-10 02:41:43.745829', 'step': 2405, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:43.798891', 'step': 2405, 'epoch': 2} +{'type': 'loss', 'content': 0.024517523124814034, 'timestamp': '2025-09-10 02:41:43.806763', 'step': 2406, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:43.860082', 'step': 2406, 'epoch': 2} +{'type': 'loss', 'content': 0.030542414635419846, 'timestamp': '2025-09-10 02:41:43.868421', 'step': 2407, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:43.921062', 'step': 2407, 'epoch': 2} +{'type': 'loss', 'content': 0.010237812995910645, 'timestamp': '2025-09-10 02:41:43.926768', 'step': 2408, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 18560112737920.0}, 'timestamp': '2025-09-10 02:41:44.055465', 'step': 2408, 'epoch': 2} +{'type': 'loss', 'content': 0.017811806872487068, 'timestamp': '2025-09-10 02:41:44.083804', 'step': 2409, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:44.137439', 'step': 2409, 'epoch': 2} +{'type': 'loss', 'content': 0.00509228091686964, 'timestamp': '2025-09-10 02:41:44.139492', 'step': 2410, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:44.193111', 'step': 2410, 'epoch': 2} +{'type': 'loss', 'content': 0.017014186829328537, 'timestamp': '2025-09-10 02:41:44.199142', 'step': 2411, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:44.252782', 'step': 2411, 'epoch': 2} +{'type': 'loss', 'content': 0.01280141156166792, 'timestamp': '2025-09-10 02:41:44.259274', 'step': 2412, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:41:44.323682', 'step': 2412, 'epoch': 2} +{'type': 'loss', 'content': 0.003944852855056524, 'timestamp': '2025-09-10 02:41:44.336973', 'step': 2413, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:44.390190', 'step': 2413, 'epoch': 2} +{'type': 'loss', 'content': 0.00950128398835659, 'timestamp': '2025-09-10 02:41:44.392595', 'step': 2414, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:44.445675', 'step': 2414, 'epoch': 2} +{'type': 'loss', 'content': 0.02840043418109417, 'timestamp': '2025-09-10 02:41:44.447783', 'step': 2415, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:44.500886', 'step': 2415, 'epoch': 2} +{'type': 'loss', 'content': 0.006371225696057081, 'timestamp': '2025-09-10 02:41:44.507878', 'step': 2416, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:44.560264', 'step': 2416, 'epoch': 2} +{'type': 'loss', 'content': 0.010210484266281128, 'timestamp': '2025-09-10 02:41:44.563063', 'step': 2417, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:44.616721', 'step': 2417, 'epoch': 2} +{'type': 'loss', 'content': 0.0269516222178936, 'timestamp': '2025-09-10 02:41:44.626378', 'step': 2418, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:44.679939', 'step': 2418, 'epoch': 2} +{'type': 'loss', 'content': 0.008304530754685402, 'timestamp': '2025-09-10 02:41:44.682262', 'step': 2419, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:44.734648', 'step': 2419, 'epoch': 2} +{'type': 'loss', 'content': 0.00686608674004674, 'timestamp': '2025-09-10 02:41:44.740573', 'step': 2420, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:44.793275', 'step': 2420, 'epoch': 2} +{'type': 'loss', 'content': 0.004633853677660227, 'timestamp': '2025-09-10 02:41:44.795885', 'step': 2421, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:41:44.864335', 'step': 2421, 'epoch': 2} +{'type': 'loss', 'content': 0.009348112158477306, 'timestamp': '2025-09-10 02:41:44.876974', 'step': 2422, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:44.930440', 'step': 2422, 'epoch': 2} +{'type': 'loss', 'content': 0.02112320438027382, 'timestamp': '2025-09-10 02:41:44.932456', 'step': 2423, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:44.985321', 'step': 2423, 'epoch': 2} +{'type': 'loss', 'content': 0.006549620069563389, 'timestamp': '2025-09-10 02:41:44.991048', 'step': 2424, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:45.043448', 'step': 2424, 'epoch': 2} +{'type': 'loss', 'content': 0.024886297062039375, 'timestamp': '2025-09-10 02:41:45.045826', 'step': 2425, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:45.099077', 'step': 2425, 'epoch': 2} +{'type': 'loss', 'content': 0.008090948686003685, 'timestamp': '2025-09-10 02:41:45.101636', 'step': 2426, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:41:45.169496', 'step': 2426, 'epoch': 2} +{'type': 'loss', 'content': 0.011296511627733707, 'timestamp': '2025-09-10 02:41:45.182073', 'step': 2427, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:45.236339', 'step': 2427, 'epoch': 2} +{'type': 'loss', 'content': 0.005852424539625645, 'timestamp': '2025-09-10 02:41:45.245477', 'step': 2428, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:45.300019', 'step': 2428, 'epoch': 2} +{'type': 'loss', 'content': 0.016052333638072014, 'timestamp': '2025-09-10 02:41:45.310485', 'step': 2429, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:45.365420', 'step': 2429, 'epoch': 2} +{'type': 'loss', 'content': 0.015054935589432716, 'timestamp': '2025-09-10 02:41:45.368227', 'step': 2430, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:45.423205', 'step': 2430, 'epoch': 2} +{'type': 'loss', 'content': 0.004304246511310339, 'timestamp': '2025-09-10 02:41:45.426311', 'step': 2431, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:45.479858', 'step': 2431, 'epoch': 2} +{'type': 'loss', 'content': 0.027863934636116028, 'timestamp': '2025-09-10 02:41:45.488749', 'step': 2432, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:41:45.557116', 'step': 2432, 'epoch': 2} +{'type': 'loss', 'content': 0.005833240691572428, 'timestamp': '2025-09-10 02:41:45.570344', 'step': 2433, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:45.624336', 'step': 2433, 'epoch': 2} +{'type': 'loss', 'content': 0.0322459414601326, 'timestamp': '2025-09-10 02:41:45.626998', 'step': 2434, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:41:45.681501', 'step': 2434, 'epoch': 2} +{'type': 'loss', 'content': 0.01197302620857954, 'timestamp': '2025-09-10 02:41:45.685217', 'step': 2435, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:45.749203', 'step': 2435, 'epoch': 2} +{'type': 'loss', 'content': 0.010854208841919899, 'timestamp': '2025-09-10 02:41:45.760399', 'step': 2436, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:41:45.827042', 'step': 2436, 'epoch': 2} +{'type': 'loss', 'content': 0.009456430561840534, 'timestamp': '2025-09-10 02:41:45.840292', 'step': 2437, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:45.896779', 'step': 2437, 'epoch': 2} +{'type': 'loss', 'content': 0.012631854973733425, 'timestamp': '2025-09-10 02:41:45.899205', 'step': 2438, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:45.954025', 'step': 2438, 'epoch': 2} +{'type': 'loss', 'content': 0.017019402235746384, 'timestamp': '2025-09-10 02:41:45.963695', 'step': 2439, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:46.017971', 'step': 2439, 'epoch': 2} +{'type': 'loss', 'content': 0.02528921328485012, 'timestamp': '2025-09-10 02:41:46.025113', 'step': 2440, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:46.084778', 'step': 2440, 'epoch': 2} +{'type': 'loss', 'content': 0.014773269183933735, 'timestamp': '2025-09-10 02:41:46.087295', 'step': 2441, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:46.140870', 'step': 2441, 'epoch': 2} +{'type': 'loss', 'content': 0.019137965515255928, 'timestamp': '2025-09-10 02:41:46.144041', 'step': 2442, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:46.205957', 'step': 2442, 'epoch': 2} +{'type': 'loss', 'content': 0.027888478711247444, 'timestamp': '2025-09-10 02:41:46.216748', 'step': 2443, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:46.270034', 'step': 2443, 'epoch': 2} +{'type': 'loss', 'content': 0.008485652506351471, 'timestamp': '2025-09-10 02:41:46.276132', 'step': 2444, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:46.330371', 'step': 2444, 'epoch': 2} +{'type': 'loss', 'content': 0.008633838966488838, 'timestamp': '2025-09-10 02:41:46.332915', 'step': 2445, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:41:46.408445', 'step': 2445, 'epoch': 2} +{'type': 'loss', 'content': 0.006705735344439745, 'timestamp': '2025-09-10 02:41:46.422401', 'step': 2446, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:46.476260', 'step': 2446, 'epoch': 2} +{'type': 'loss', 'content': 0.025132521986961365, 'timestamp': '2025-09-10 02:41:46.478611', 'step': 2447, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:46.532493', 'step': 2447, 'epoch': 2} +{'type': 'loss', 'content': 0.007952259853482246, 'timestamp': '2025-09-10 02:41:46.538553', 'step': 2448, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:46.595229', 'step': 2448, 'epoch': 2} +{'type': 'loss', 'content': 0.004040360916405916, 'timestamp': '2025-09-10 02:41:46.606443', 'step': 2449, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:46.660624', 'step': 2449, 'epoch': 2} +{'type': 'loss', 'content': 0.030639899894595146, 'timestamp': '2025-09-10 02:41:46.662944', 'step': 2450, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:46.715888', 'step': 2450, 'epoch': 2} +{'type': 'loss', 'content': 0.0033033587969839573, 'timestamp': '2025-09-10 02:41:46.718118', 'step': 2451, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:41:46.792000', 'step': 2451, 'epoch': 2} +{'type': 'loss', 'content': 0.015835467725992203, 'timestamp': '2025-09-10 02:41:46.806528', 'step': 2452, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:46.859315', 'step': 2452, 'epoch': 2} +{'type': 'loss', 'content': 0.012064282782375813, 'timestamp': '2025-09-10 02:41:46.861602', 'step': 2453, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:46.914780', 'step': 2453, 'epoch': 2} +{'type': 'loss', 'content': 0.005674016196280718, 'timestamp': '2025-09-10 02:41:46.917810', 'step': 2454, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:46.970676', 'step': 2454, 'epoch': 2} +{'type': 'loss', 'content': 0.005164622329175472, 'timestamp': '2025-09-10 02:41:46.972847', 'step': 2455, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:47.025784', 'step': 2455, 'epoch': 2} +{'type': 'loss', 'content': 0.008402018807828426, 'timestamp': '2025-09-10 02:41:47.031651', 'step': 2456, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:47.084786', 'step': 2456, 'epoch': 2} +{'type': 'loss', 'content': 0.016407065093517303, 'timestamp': '2025-09-10 02:41:47.086842', 'step': 2457, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:47.139713', 'step': 2457, 'epoch': 2} +{'type': 'loss', 'content': 0.01931409165263176, 'timestamp': '2025-09-10 02:41:47.141811', 'step': 2458, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:47.194991', 'step': 2458, 'epoch': 2} +{'type': 'loss', 'content': 0.01934720017015934, 'timestamp': '2025-09-10 02:41:47.197490', 'step': 2459, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:47.250612', 'step': 2459, 'epoch': 2} +{'type': 'loss', 'content': 0.023362424224615097, 'timestamp': '2025-09-10 02:41:47.256818', 'step': 2460, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:47.309839', 'step': 2460, 'epoch': 2} +{'type': 'loss', 'content': 0.004369188565760851, 'timestamp': '2025-09-10 02:41:47.319862', 'step': 2461, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:47.373554', 'step': 2461, 'epoch': 2} +{'type': 'loss', 'content': 0.0018872682703658938, 'timestamp': '2025-09-10 02:41:47.381835', 'step': 2462, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:47.435820', 'step': 2462, 'epoch': 2} +{'type': 'loss', 'content': 0.004272147547453642, 'timestamp': '2025-09-10 02:41:47.437829', 'step': 2463, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:47.491315', 'step': 2463, 'epoch': 2} +{'type': 'loss', 'content': 0.022475240752100945, 'timestamp': '2025-09-10 02:41:47.497437', 'step': 2464, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:47.549730', 'step': 2464, 'epoch': 2} +{'type': 'loss', 'content': 0.02900022082030773, 'timestamp': '2025-09-10 02:41:47.552718', 'step': 2465, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:47.605936', 'step': 2465, 'epoch': 2} +{'type': 'loss', 'content': 0.004683454986661673, 'timestamp': '2025-09-10 02:41:47.612094', 'step': 2466, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:47.665686', 'step': 2466, 'epoch': 2} +{'type': 'loss', 'content': 0.0017793446313589811, 'timestamp': '2025-09-10 02:41:47.668028', 'step': 2467, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:47.721954', 'step': 2467, 'epoch': 2} +{'type': 'loss', 'content': 0.003159657586365938, 'timestamp': '2025-09-10 02:41:47.732542', 'step': 2468, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:47.784826', 'step': 2468, 'epoch': 2} +{'type': 'loss', 'content': 0.002170691965147853, 'timestamp': '2025-09-10 02:41:47.787099', 'step': 2469, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:47.839888', 'step': 2469, 'epoch': 2} +{'type': 'loss', 'content': 0.007415976841002703, 'timestamp': '2025-09-10 02:41:47.842089', 'step': 2470, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:47.895689', 'step': 2470, 'epoch': 2} +{'type': 'loss', 'content': 0.011888401582837105, 'timestamp': '2025-09-10 02:41:47.897946', 'step': 2471, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:47.951118', 'step': 2471, 'epoch': 2} +{'type': 'loss', 'content': 0.013907036744058132, 'timestamp': '2025-09-10 02:41:47.957292', 'step': 2472, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:48.010767', 'step': 2472, 'epoch': 2} +{'type': 'loss', 'content': 0.0038232323713600636, 'timestamp': '2025-09-10 02:41:48.013091', 'step': 2473, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:48.071377', 'step': 2473, 'epoch': 2} +{'type': 'loss', 'content': 0.011546431109309196, 'timestamp': '2025-09-10 02:41:48.081835', 'step': 2474, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:48.136415', 'step': 2474, 'epoch': 2} +{'type': 'loss', 'content': 0.049716461449861526, 'timestamp': '2025-09-10 02:41:48.138794', 'step': 2475, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:48.193331', 'step': 2475, 'epoch': 2} +{'type': 'loss', 'content': 0.024496106430888176, 'timestamp': '2025-09-10 02:41:48.203932', 'step': 2476, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:41:48.260978', 'step': 2476, 'epoch': 2} +{'type': 'loss', 'content': 0.0039366804994642735, 'timestamp': '2025-09-10 02:41:48.272209', 'step': 2477, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:48.327203', 'step': 2477, 'epoch': 2} +{'type': 'loss', 'content': 0.01207022089511156, 'timestamp': '2025-09-10 02:41:48.336861', 'step': 2478, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:48.390695', 'step': 2478, 'epoch': 2} +{'type': 'loss', 'content': 0.004953169729560614, 'timestamp': '2025-09-10 02:41:48.396560', 'step': 2479, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:48.449961', 'step': 2479, 'epoch': 2} +{'type': 'loss', 'content': 0.016903908923268318, 'timestamp': '2025-09-10 02:41:48.456220', 'step': 2480, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:41:48.509904', 'step': 2480, 'epoch': 2} +{'type': 'loss', 'content': 0.01183212548494339, 'timestamp': '2025-09-10 02:41:48.517085', 'step': 2481, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:41:48.570966', 'step': 2481, 'epoch': 2} +{'type': 'loss', 'content': 0.014158886857330799, 'timestamp': '2025-09-10 02:41:48.573237', 'step': 2482, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:41:48.627000', 'step': 2482, 'epoch': 2} +{'type': 'loss', 'content': 0.01064236555248499, 'timestamp': '2025-09-10 02:41:48.633111', 'step': 2483, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:41:48.695532', 'step': 2483, 'epoch': 2} +{'type': 'loss', 'content': 0.020532239228487015, 'timestamp': '2025-09-10 02:41:48.707467', 'step': 2484, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:48.760291', 'step': 2484, 'epoch': 2} +{'type': 'loss', 'content': 0.007942325435578823, 'timestamp': '2025-09-10 02:41:48.762784', 'step': 2485, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:41:48.818027', 'step': 2485, 'epoch': 2} +{'type': 'loss', 'content': 0.003431000979617238, 'timestamp': '2025-09-10 02:41:48.827766', 'step': 2486, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:41:48.880865', 'step': 2486, 'epoch': 2} +{'type': 'loss', 'content': 0.01795695349574089, 'timestamp': '2025-09-10 02:41:48.883333', 'step': 2487, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:48.936778', 'step': 2487, 'epoch': 2} +{'type': 'loss', 'content': 0.010808142833411694, 'timestamp': '2025-09-10 02:41:48.943218', 'step': 2488, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:41:48.996068', 'step': 2488, 'epoch': 2} +{'type': 'loss', 'content': 0.0020689324010163546, 'timestamp': '2025-09-10 02:41:48.998663', 'step': 2489, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:49.052140', 'step': 2489, 'epoch': 2} +{'type': 'loss', 'content': 0.010920782573521137, 'timestamp': '2025-09-10 02:41:49.054559', 'step': 2490, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:49.108083', 'step': 2490, 'epoch': 2} +{'type': 'loss', 'content': 0.010443278588354588, 'timestamp': '2025-09-10 02:41:49.110707', 'step': 2491, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:49.165121', 'step': 2491, 'epoch': 2} +{'type': 'loss', 'content': 0.003924945369362831, 'timestamp': '2025-09-10 02:41:49.175541', 'step': 2492, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:49.227703', 'step': 2492, 'epoch': 2} +{'type': 'loss', 'content': 0.019283965229988098, 'timestamp': '2025-09-10 02:41:49.230014', 'step': 2493, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:41:49.283338', 'step': 2493, 'epoch': 2} +{'type': 'loss', 'content': 0.004657456185668707, 'timestamp': '2025-09-10 02:41:49.292953', 'step': 2494, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:41:49.346085', 'step': 2494, 'epoch': 2} +{'type': 'loss', 'content': 0.0023488460574299097, 'timestamp': '2025-09-10 02:41:49.348389', 'step': 2495, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:41:49.401378', 'step': 2495, 'epoch': 2} +{'type': 'loss', 'content': 0.0063967215828597546, 'timestamp': '2025-09-10 02:41:49.407362', 'step': 2496, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:41:49.459703', 'step': 2496, 'epoch': 2} +{'type': 'loss', 'content': 0.004745048936456442, 'timestamp': '2025-09-10 02:41:49.462699', 'step': 2497, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:41:49.523014', 'step': 2497, 'epoch': 2} +{'type': 'loss', 'content': 0.0107539938762784, 'timestamp': '2025-09-10 02:41:49.533782', 'step': 2498, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:41:49.587341', 'step': 2498, 'epoch': 2} +{'type': 'loss', 'content': 0.009626333601772785, 'timestamp': '2025-09-10 02:41:49.589696', 'step': 2499, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:42:06.568005', 'step': 2499, 'epoch': 2} +{'type': 'pplx', 'content': 25981030.787906833, 'timestamp': '2025-09-10 02:42:06.571091', 'step': 2499, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:06.626271', 'step': 2499, 'epoch': 2} +{'type': 'loss', 'content': 0.008738330565392971, 'timestamp': '2025-09-10 02:42:06.632792', 'step': 2500, 'epoch': 2} +{'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-10 02:42:07.029243', 'step': 2500, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:07.087528', 'step': 2500, 'epoch': 2} +{'type': 'loss', 'content': 0.0007609358872286975, 'timestamp': '2025-09-10 02:42:07.090183', 'step': 2501, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:07.145221', 'step': 2501, 'epoch': 2} +{'type': 'loss', 'content': 0.0012053739046677947, 'timestamp': '2025-09-10 02:42:07.150177', 'step': 2502, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:07.204007', 'step': 2502, 'epoch': 2} +{'type': 'loss', 'content': 0.017292501404881477, 'timestamp': '2025-09-10 02:42:07.206127', 'step': 2503, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:07.259753', 'step': 2503, 'epoch': 2} +{'type': 'loss', 'content': 0.0072624897584319115, 'timestamp': '2025-09-10 02:42:07.266741', 'step': 2504, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:07.319575', 'step': 2504, 'epoch': 2} +{'type': 'loss', 'content': 0.014371352270245552, 'timestamp': '2025-09-10 02:42:07.322468', 'step': 2505, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:07.376798', 'step': 2505, 'epoch': 2} +{'type': 'loss', 'content': 0.0016270782798528671, 'timestamp': '2025-09-10 02:42:07.379055', 'step': 2506, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:07.433164', 'step': 2506, 'epoch': 2} +{'type': 'loss', 'content': 0.005980811547487974, 'timestamp': '2025-09-10 02:42:07.435434', 'step': 2507, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:42:07.496230', 'step': 2507, 'epoch': 2} +{'type': 'loss', 'content': 0.02112610451877117, 'timestamp': '2025-09-10 02:42:07.507937', 'step': 2508, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:07.561007', 'step': 2508, 'epoch': 2} +{'type': 'loss', 'content': 0.008170275948941708, 'timestamp': '2025-09-10 02:42:07.566993', 'step': 2509, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:07.620095', 'step': 2509, 'epoch': 2} +{'type': 'loss', 'content': 0.03375102952122688, 'timestamp': '2025-09-10 02:42:07.622691', 'step': 2510, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:07.676206', 'step': 2510, 'epoch': 2} +{'type': 'loss', 'content': 0.024797100573778152, 'timestamp': '2025-09-10 02:42:07.678422', 'step': 2511, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:07.732886', 'step': 2511, 'epoch': 2} +{'type': 'loss', 'content': 0.0027288347482681274, 'timestamp': '2025-09-10 02:42:07.743469', 'step': 2512, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:07.796086', 'step': 2512, 'epoch': 2} +{'type': 'loss', 'content': 0.011557974852621555, 'timestamp': '2025-09-10 02:42:07.798565', 'step': 2513, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:07.851992', 'step': 2513, 'epoch': 2} +{'type': 'loss', 'content': 0.006120680831372738, 'timestamp': '2025-09-10 02:42:07.854720', 'step': 2514, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:07.908900', 'step': 2514, 'epoch': 2} +{'type': 'loss', 'content': 0.012789232656359673, 'timestamp': '2025-09-10 02:42:07.918502', 'step': 2515, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:07.973264', 'step': 2515, 'epoch': 2} +{'type': 'loss', 'content': 0.013009892776608467, 'timestamp': '2025-09-10 02:42:07.983798', 'step': 2516, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:08.036492', 'step': 2516, 'epoch': 2} +{'type': 'loss', 'content': 0.0021685867104679346, 'timestamp': '2025-09-10 02:42:08.042245', 'step': 2517, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:08.097017', 'step': 2517, 'epoch': 2} +{'type': 'loss', 'content': 0.0203882846981287, 'timestamp': '2025-09-10 02:42:08.099114', 'step': 2518, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:08.151885', 'step': 2518, 'epoch': 2} +{'type': 'loss', 'content': 0.002050741109997034, 'timestamp': '2025-09-10 02:42:08.154344', 'step': 2519, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:08.207087', 'step': 2519, 'epoch': 2} +{'type': 'loss', 'content': 0.02386748231947422, 'timestamp': '2025-09-10 02:42:08.213253', 'step': 2520, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:08.265795', 'step': 2520, 'epoch': 2} +{'type': 'loss', 'content': 0.0099802166223526, 'timestamp': '2025-09-10 02:42:08.268151', 'step': 2521, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:08.326579', 'step': 2521, 'epoch': 2} +{'type': 'loss', 'content': 0.0045924014411866665, 'timestamp': '2025-09-10 02:42:08.328849', 'step': 2522, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:08.384206', 'step': 2522, 'epoch': 2} +{'type': 'loss', 'content': 0.01153036393225193, 'timestamp': '2025-09-10 02:42:08.389379', 'step': 2523, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:08.455327', 'step': 2523, 'epoch': 2} +{'type': 'loss', 'content': 0.021937353536486626, 'timestamp': '2025-09-10 02:42:08.465687', 'step': 2524, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:08.524909', 'step': 2524, 'epoch': 2} +{'type': 'loss', 'content': 0.004579412750899792, 'timestamp': '2025-09-10 02:42:08.536495', 'step': 2525, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:08.596939', 'step': 2525, 'epoch': 2} +{'type': 'loss', 'content': 0.018722962588071823, 'timestamp': '2025-09-10 02:42:08.607382', 'step': 2526, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:08.661044', 'step': 2526, 'epoch': 2} +{'type': 'loss', 'content': 0.0006787048769183457, 'timestamp': '2025-09-10 02:42:08.663874', 'step': 2527, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:08.716954', 'step': 2527, 'epoch': 2} +{'type': 'loss', 'content': 0.008177361451089382, 'timestamp': '2025-09-10 02:42:08.724654', 'step': 2528, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:08.784235', 'step': 2528, 'epoch': 2} +{'type': 'loss', 'content': 0.0024081855081021786, 'timestamp': '2025-09-10 02:42:08.794699', 'step': 2529, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:08.856625', 'step': 2529, 'epoch': 2} +{'type': 'loss', 'content': 0.011337630450725555, 'timestamp': '2025-09-10 02:42:08.867399', 'step': 2530, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:08.920892', 'step': 2530, 'epoch': 2} +{'type': 'loss', 'content': 0.02496512606739998, 'timestamp': '2025-09-10 02:42:08.923103', 'step': 2531, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:08.976383', 'step': 2531, 'epoch': 2} +{'type': 'loss', 'content': 0.010318547487258911, 'timestamp': '2025-09-10 02:42:08.982674', 'step': 2532, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:09.042989', 'step': 2532, 'epoch': 2} +{'type': 'loss', 'content': 0.02730054035782814, 'timestamp': '2025-09-10 02:42:09.054326', 'step': 2533, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:09.115072', 'step': 2533, 'epoch': 2} +{'type': 'loss', 'content': 0.0053492337465286255, 'timestamp': '2025-09-10 02:42:09.117797', 'step': 2534, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:42:09.208932', 'step': 2534, 'epoch': 2} +{'type': 'loss', 'content': 0.012704300694167614, 'timestamp': '2025-09-10 02:42:09.226015', 'step': 2535, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:09.287271', 'step': 2535, 'epoch': 2} +{'type': 'loss', 'content': 0.004206423182040453, 'timestamp': '2025-09-10 02:42:09.297872', 'step': 2536, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:09.350842', 'step': 2536, 'epoch': 2} +{'type': 'loss', 'content': 0.001664001145400107, 'timestamp': '2025-09-10 02:42:09.353139', 'step': 2537, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:09.409530', 'step': 2537, 'epoch': 2} +{'type': 'loss', 'content': 0.025458360090851784, 'timestamp': '2025-09-10 02:42:09.411956', 'step': 2538, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:09.472858', 'step': 2538, 'epoch': 2} +{'type': 'loss', 'content': 0.030657034367322922, 'timestamp': '2025-09-10 02:42:09.483515', 'step': 2539, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:42:09.547785', 'step': 2539, 'epoch': 2} +{'type': 'loss', 'content': 0.008289070799946785, 'timestamp': '2025-09-10 02:42:09.559625', 'step': 2540, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:09.613178', 'step': 2540, 'epoch': 2} +{'type': 'loss', 'content': 0.0224428940564394, 'timestamp': '2025-09-10 02:42:09.615659', 'step': 2541, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:09.670340', 'step': 2541, 'epoch': 2} +{'type': 'loss', 'content': 0.030413294211030006, 'timestamp': '2025-09-10 02:42:09.672690', 'step': 2542, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:09.725701', 'step': 2542, 'epoch': 2} +{'type': 'loss', 'content': 0.02619311399757862, 'timestamp': '2025-09-10 02:42:09.728021', 'step': 2543, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:09.786549', 'step': 2543, 'epoch': 2} +{'type': 'loss', 'content': 0.00035007070982828736, 'timestamp': '2025-09-10 02:42:09.792692', 'step': 2544, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:09.846398', 'step': 2544, 'epoch': 2} +{'type': 'loss', 'content': 0.011450978927314281, 'timestamp': '2025-09-10 02:42:09.852888', 'step': 2545, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:09.905693', 'step': 2545, 'epoch': 2} +{'type': 'loss', 'content': 0.01046075951308012, 'timestamp': '2025-09-10 02:42:09.909774', 'step': 2546, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:09.973588', 'step': 2546, 'epoch': 2} +{'type': 'loss', 'content': 0.0021498361602425575, 'timestamp': '2025-09-10 02:42:09.976636', 'step': 2547, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:10.040432', 'step': 2547, 'epoch': 2} +{'type': 'loss', 'content': 0.002972465241327882, 'timestamp': '2025-09-10 02:42:10.046819', 'step': 2548, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:42:10.107251', 'step': 2548, 'epoch': 2} +{'type': 'loss', 'content': 0.012440596707165241, 'timestamp': '2025-09-10 02:42:10.119054', 'step': 2549, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:10.173417', 'step': 2549, 'epoch': 2} +{'type': 'loss', 'content': 0.005210519302636385, 'timestamp': '2025-09-10 02:42:10.176425', 'step': 2550, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:10.229540', 'step': 2550, 'epoch': 2} +{'type': 'loss', 'content': 0.003876955946907401, 'timestamp': '2025-09-10 02:42:10.234986', 'step': 2551, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:10.289517', 'step': 2551, 'epoch': 2} +{'type': 'loss', 'content': 0.014093932695686817, 'timestamp': '2025-09-10 02:42:10.295645', 'step': 2552, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:10.357491', 'step': 2552, 'epoch': 2} +{'type': 'loss', 'content': 0.04181818291544914, 'timestamp': '2025-09-10 02:42:10.361557', 'step': 2553, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:10.419717', 'step': 2553, 'epoch': 2} +{'type': 'loss', 'content': 0.014616904780268669, 'timestamp': '2025-09-10 02:42:10.425888', 'step': 2554, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:10.481786', 'step': 2554, 'epoch': 2} +{'type': 'loss', 'content': 0.007901540026068687, 'timestamp': '2025-09-10 02:42:10.484385', 'step': 2555, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:10.538021', 'step': 2555, 'epoch': 2} +{'type': 'loss', 'content': 0.030624257400631905, 'timestamp': '2025-09-10 02:42:10.547433', 'step': 2556, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:10.601937', 'step': 2556, 'epoch': 2} +{'type': 'loss', 'content': 0.0014362893998622894, 'timestamp': '2025-09-10 02:42:10.610077', 'step': 2557, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:10.678890', 'step': 2557, 'epoch': 2} +{'type': 'loss', 'content': 0.0006508413353003561, 'timestamp': '2025-09-10 02:42:10.681354', 'step': 2558, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:10.734703', 'step': 2558, 'epoch': 2} +{'type': 'loss', 'content': 0.011077557690441608, 'timestamp': '2025-09-10 02:42:10.737166', 'step': 2559, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:10.801708', 'step': 2559, 'epoch': 2} +{'type': 'loss', 'content': 0.004156519193202257, 'timestamp': '2025-09-10 02:42:10.808071', 'step': 2560, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:10.863207', 'step': 2560, 'epoch': 2} +{'type': 'loss', 'content': 0.007181708235293627, 'timestamp': '2025-09-10 02:42:10.869253', 'step': 2561, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:10.936305', 'step': 2561, 'epoch': 2} +{'type': 'loss', 'content': 0.0007056365138851106, 'timestamp': '2025-09-10 02:42:10.940350', 'step': 2562, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:10.997489', 'step': 2562, 'epoch': 2} +{'type': 'loss', 'content': 0.005064928438514471, 'timestamp': '2025-09-10 02:42:10.999970', 'step': 2563, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:11.052778', 'step': 2563, 'epoch': 2} +{'type': 'loss', 'content': 0.01664942502975464, 'timestamp': '2025-09-10 02:42:11.058984', 'step': 2564, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:11.117091', 'step': 2564, 'epoch': 2} +{'type': 'loss', 'content': 0.005057492293417454, 'timestamp': '2025-09-10 02:42:11.126077', 'step': 2565, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:11.183873', 'step': 2565, 'epoch': 2} +{'type': 'loss', 'content': 0.022189823910593987, 'timestamp': '2025-09-10 02:42:11.186537', 'step': 2566, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:11.243687', 'step': 2566, 'epoch': 2} +{'type': 'loss', 'content': 0.026736078783869743, 'timestamp': '2025-09-10 02:42:11.246891', 'step': 2567, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:11.304166', 'step': 2567, 'epoch': 2} +{'type': 'loss', 'content': 0.009926991537213326, 'timestamp': '2025-09-10 02:42:11.310299', 'step': 2568, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:11.372035', 'step': 2568, 'epoch': 2} +{'type': 'loss', 'content': 0.00032137572998180985, 'timestamp': '2025-09-10 02:42:11.383674', 'step': 2569, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:11.440946', 'step': 2569, 'epoch': 2} +{'type': 'loss', 'content': 0.0036710728891193867, 'timestamp': '2025-09-10 02:42:11.451192', 'step': 2570, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:11.512540', 'step': 2570, 'epoch': 2} +{'type': 'loss', 'content': 0.001445622299797833, 'timestamp': '2025-09-10 02:42:11.515330', 'step': 2571, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:11.570002', 'step': 2571, 'epoch': 2} +{'type': 'loss', 'content': 0.015834486111998558, 'timestamp': '2025-09-10 02:42:11.576731', 'step': 2572, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:11.637773', 'step': 2572, 'epoch': 2} +{'type': 'loss', 'content': 0.001168629853054881, 'timestamp': '2025-09-10 02:42:11.657947', 'step': 2573, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:11.714785', 'step': 2573, 'epoch': 2} +{'type': 'loss', 'content': 0.004391381051391363, 'timestamp': '2025-09-10 02:42:11.719912', 'step': 2574, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:11.781613', 'step': 2574, 'epoch': 2} +{'type': 'loss', 'content': 0.013341886922717094, 'timestamp': '2025-09-10 02:42:11.792033', 'step': 2575, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:11.846213', 'step': 2575, 'epoch': 2} +{'type': 'loss', 'content': 0.005331250838935375, 'timestamp': '2025-09-10 02:42:11.857062', 'step': 2576, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:11.914076', 'step': 2576, 'epoch': 2} +{'type': 'loss', 'content': 0.015829240903258324, 'timestamp': '2025-09-10 02:42:11.924564', 'step': 2577, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:11.988526', 'step': 2577, 'epoch': 2} +{'type': 'loss', 'content': 0.0023078806698322296, 'timestamp': '2025-09-10 02:42:11.999227', 'step': 2578, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:12.058106', 'step': 2578, 'epoch': 2} +{'type': 'loss', 'content': 0.01056588999927044, 'timestamp': '2025-09-10 02:42:12.063308', 'step': 2579, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:12.123520', 'step': 2579, 'epoch': 2} +{'type': 'loss', 'content': 0.007611948996782303, 'timestamp': '2025-09-10 02:42:12.131708', 'step': 2580, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:12.185445', 'step': 2580, 'epoch': 2} +{'type': 'loss', 'content': 0.0280488021671772, 'timestamp': '2025-09-10 02:42:12.187941', 'step': 2581, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:42:12.276173', 'step': 2581, 'epoch': 2} +{'type': 'loss', 'content': 0.011155783198773861, 'timestamp': '2025-09-10 02:42:12.291209', 'step': 2582, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:12.354422', 'step': 2582, 'epoch': 2} +{'type': 'loss', 'content': 0.02470453456044197, 'timestamp': '2025-09-10 02:42:12.372665', 'step': 2583, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:12.442356', 'step': 2583, 'epoch': 2} +{'type': 'loss', 'content': 0.014006450772285461, 'timestamp': '2025-09-10 02:42:12.451990', 'step': 2584, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:12.508523', 'step': 2584, 'epoch': 2} +{'type': 'loss', 'content': 0.009627717547118664, 'timestamp': '2025-09-10 02:42:12.512268', 'step': 2585, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:12.579979', 'step': 2585, 'epoch': 2} +{'type': 'loss', 'content': 0.020211519673466682, 'timestamp': '2025-09-10 02:42:12.590422', 'step': 2586, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:12.645995', 'step': 2586, 'epoch': 2} +{'type': 'loss', 'content': 0.022182533517479897, 'timestamp': '2025-09-10 02:42:12.655792', 'step': 2587, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:42:12.724924', 'step': 2587, 'epoch': 2} +{'type': 'loss', 'content': 0.0041777160950005054, 'timestamp': '2025-09-10 02:42:12.737917', 'step': 2588, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:12.794653', 'step': 2588, 'epoch': 2} +{'type': 'loss', 'content': 0.015036248601973057, 'timestamp': '2025-09-10 02:42:12.799925', 'step': 2589, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:12.863312', 'step': 2589, 'epoch': 2} +{'type': 'loss', 'content': 0.01882903091609478, 'timestamp': '2025-09-10 02:42:12.872651', 'step': 2590, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:12.927709', 'step': 2590, 'epoch': 2} +{'type': 'loss', 'content': 0.0014924319693818688, 'timestamp': '2025-09-10 02:42:12.936897', 'step': 2591, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:12.995248', 'step': 2591, 'epoch': 2} +{'type': 'loss', 'content': 0.01842663064599037, 'timestamp': '2025-09-10 02:42:13.001482', 'step': 2592, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:13.055468', 'step': 2592, 'epoch': 2} +{'type': 'loss', 'content': 0.0039041806012392044, 'timestamp': '2025-09-10 02:42:13.057678', 'step': 2593, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:13.112199', 'step': 2593, 'epoch': 2} +{'type': 'loss', 'content': 0.02539738453924656, 'timestamp': '2025-09-10 02:42:13.115025', 'step': 2594, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:13.173590', 'step': 2594, 'epoch': 2} +{'type': 'loss', 'content': 0.0013453267747536302, 'timestamp': '2025-09-10 02:42:13.175773', 'step': 2595, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:13.228884', 'step': 2595, 'epoch': 2} +{'type': 'loss', 'content': 0.0006421868456527591, 'timestamp': '2025-09-10 02:42:13.234862', 'step': 2596, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:42:13.300119', 'step': 2596, 'epoch': 2} +{'type': 'loss', 'content': 0.009963750839233398, 'timestamp': '2025-09-10 02:42:13.313350', 'step': 2597, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:13.366972', 'step': 2597, 'epoch': 2} +{'type': 'loss', 'content': 0.004111562855541706, 'timestamp': '2025-09-10 02:42:13.369820', 'step': 2598, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:13.429167', 'step': 2598, 'epoch': 2} +{'type': 'loss', 'content': 0.011045041494071484, 'timestamp': '2025-09-10 02:42:13.431065', 'step': 2599, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:13.490950', 'step': 2599, 'epoch': 2} +{'type': 'loss', 'content': 0.011161820963025093, 'timestamp': '2025-09-10 02:42:13.499903', 'step': 2600, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:13.553457', 'step': 2600, 'epoch': 2} +{'type': 'loss', 'content': 0.025803251191973686, 'timestamp': '2025-09-10 02:42:13.555982', 'step': 2601, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:13.610149', 'step': 2601, 'epoch': 2} +{'type': 'loss', 'content': 0.0559576153755188, 'timestamp': '2025-09-10 02:42:13.612385', 'step': 2602, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:13.667786', 'step': 2602, 'epoch': 2} +{'type': 'loss', 'content': 0.006970132235437632, 'timestamp': '2025-09-10 02:42:13.670042', 'step': 2603, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:13.725022', 'step': 2603, 'epoch': 2} +{'type': 'loss', 'content': 0.007758519612252712, 'timestamp': '2025-09-10 02:42:13.731688', 'step': 2604, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:42:13.800640', 'step': 2604, 'epoch': 2} +{'type': 'loss', 'content': 0.011060030199587345, 'timestamp': '2025-09-10 02:42:13.814266', 'step': 2605, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:13.867599', 'step': 2605, 'epoch': 2} +{'type': 'loss', 'content': 0.034862495958805084, 'timestamp': '2025-09-10 02:42:13.870103', 'step': 2606, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:13.928272', 'step': 2606, 'epoch': 2} +{'type': 'loss', 'content': 0.0031833648681640625, 'timestamp': '2025-09-10 02:42:13.938736', 'step': 2607, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:13.994791', 'step': 2607, 'epoch': 2} +{'type': 'loss', 'content': 0.0019451836124062538, 'timestamp': '2025-09-10 02:42:14.006616', 'step': 2608, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:14.065944', 'step': 2608, 'epoch': 2} +{'type': 'loss', 'content': 0.0014401193475350738, 'timestamp': '2025-09-10 02:42:14.076502', 'step': 2609, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:14.131836', 'step': 2609, 'epoch': 2} +{'type': 'loss', 'content': 0.0009075532434508204, 'timestamp': '2025-09-10 02:42:14.134143', 'step': 2610, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:14.189787', 'step': 2610, 'epoch': 2} +{'type': 'loss', 'content': 0.01933993212878704, 'timestamp': '2025-09-10 02:42:14.193346', 'step': 2611, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:14.247275', 'step': 2611, 'epoch': 2} +{'type': 'loss', 'content': 0.005016653332859278, 'timestamp': '2025-09-10 02:42:14.255144', 'step': 2612, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:14.315946', 'step': 2612, 'epoch': 2} +{'type': 'loss', 'content': 0.005122971720993519, 'timestamp': '2025-09-10 02:42:14.318496', 'step': 2613, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:14.375474', 'step': 2613, 'epoch': 2} +{'type': 'loss', 'content': 0.013108565472066402, 'timestamp': '2025-09-10 02:42:14.381339', 'step': 2614, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:14.436304', 'step': 2614, 'epoch': 2} +{'type': 'loss', 'content': 0.009488807059824467, 'timestamp': '2025-09-10 02:42:14.443848', 'step': 2615, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:14.501883', 'step': 2615, 'epoch': 2} +{'type': 'loss', 'content': 0.001307025202549994, 'timestamp': '2025-09-10 02:42:14.508215', 'step': 2616, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:14.566460', 'step': 2616, 'epoch': 2} +{'type': 'loss', 'content': 0.008845310658216476, 'timestamp': '2025-09-10 02:42:14.573955', 'step': 2617, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:14.633049', 'step': 2617, 'epoch': 2} +{'type': 'loss', 'content': 0.013522865250706673, 'timestamp': '2025-09-10 02:42:14.643474', 'step': 2618, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:14.702218', 'step': 2618, 'epoch': 2} +{'type': 'loss', 'content': 0.048304710537195206, 'timestamp': '2025-09-10 02:42:14.704420', 'step': 2619, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:14.758952', 'step': 2619, 'epoch': 2} +{'type': 'loss', 'content': 0.02768402360379696, 'timestamp': '2025-09-10 02:42:14.764864', 'step': 2620, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:14.818335', 'step': 2620, 'epoch': 2} +{'type': 'loss', 'content': 0.01532816793769598, 'timestamp': '2025-09-10 02:42:14.826127', 'step': 2621, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:14.881340', 'step': 2621, 'epoch': 2} +{'type': 'loss', 'content': 0.010250316001474857, 'timestamp': '2025-09-10 02:42:14.884104', 'step': 2622, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:14.944719', 'step': 2622, 'epoch': 2} +{'type': 'loss', 'content': 0.020182248204946518, 'timestamp': '2025-09-10 02:42:14.955117', 'step': 2623, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:15.009701', 'step': 2623, 'epoch': 2} +{'type': 'loss', 'content': 0.02402585744857788, 'timestamp': '2025-09-10 02:42:15.015572', 'step': 2624, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:42:15.068303', 'step': 2624, 'epoch': 2} +{'type': 'loss', 'content': 0.004449274856597185, 'timestamp': '2025-09-10 02:42:15.070215', 'step': 2625, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:15.123229', 'step': 2625, 'epoch': 2} +{'type': 'loss', 'content': 0.011047663167119026, 'timestamp': '2025-09-10 02:42:15.126661', 'step': 2626, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:15.187472', 'step': 2626, 'epoch': 2} +{'type': 'loss', 'content': 0.0017207256751134992, 'timestamp': '2025-09-10 02:42:15.197297', 'step': 2627, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:15.250485', 'step': 2627, 'epoch': 2} +{'type': 'loss', 'content': 0.0061110020615160465, 'timestamp': '2025-09-10 02:42:15.257519', 'step': 2628, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:15.310959', 'step': 2628, 'epoch': 2} +{'type': 'loss', 'content': 0.01842239871621132, 'timestamp': '2025-09-10 02:42:15.313981', 'step': 2629, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:15.366946', 'step': 2629, 'epoch': 2} +{'type': 'loss', 'content': 0.008514808490872383, 'timestamp': '2025-09-10 02:42:15.372055', 'step': 2630, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:15.427967', 'step': 2630, 'epoch': 2} +{'type': 'loss', 'content': 0.007261297665536404, 'timestamp': '2025-09-10 02:42:15.429986', 'step': 2631, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:15.483107', 'step': 2631, 'epoch': 2} +{'type': 'loss', 'content': 0.0009131209808401763, 'timestamp': '2025-09-10 02:42:15.490035', 'step': 2632, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:15.545992', 'step': 2632, 'epoch': 2} +{'type': 'loss', 'content': 0.014827689155936241, 'timestamp': '2025-09-10 02:42:15.548415', 'step': 2633, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:15.609305', 'step': 2633, 'epoch': 2} +{'type': 'loss', 'content': 0.019348548725247383, 'timestamp': '2025-09-10 02:42:15.620054', 'step': 2634, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:15.674050', 'step': 2634, 'epoch': 2} +{'type': 'loss', 'content': 0.006343338638544083, 'timestamp': '2025-09-10 02:42:15.677389', 'step': 2635, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:15.730967', 'step': 2635, 'epoch': 2} +{'type': 'loss', 'content': 0.004739842377603054, 'timestamp': '2025-09-10 02:42:15.736942', 'step': 2636, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:15.789883', 'step': 2636, 'epoch': 2} +{'type': 'loss', 'content': 0.0178393367677927, 'timestamp': '2025-09-10 02:42:15.792766', 'step': 2637, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:15.847380', 'step': 2637, 'epoch': 2} +{'type': 'loss', 'content': 0.02419670857489109, 'timestamp': '2025-09-10 02:42:15.857004', 'step': 2638, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:15.911951', 'step': 2638, 'epoch': 2} +{'type': 'loss', 'content': 0.009919347241520882, 'timestamp': '2025-09-10 02:42:15.915094', 'step': 2639, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:15.971119', 'step': 2639, 'epoch': 2} +{'type': 'loss', 'content': 0.02844708040356636, 'timestamp': '2025-09-10 02:42:15.980829', 'step': 2640, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:16.035899', 'step': 2640, 'epoch': 2} +{'type': 'loss', 'content': 0.01855628378689289, 'timestamp': '2025-09-10 02:42:16.038624', 'step': 2641, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:16.091865', 'step': 2641, 'epoch': 2} +{'type': 'loss', 'content': 0.015538026578724384, 'timestamp': '2025-09-10 02:42:16.094395', 'step': 2642, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:16.147594', 'step': 2642, 'epoch': 2} +{'type': 'loss', 'content': 0.01040005125105381, 'timestamp': '2025-09-10 02:42:16.150585', 'step': 2643, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:16.204385', 'step': 2643, 'epoch': 2} +{'type': 'loss', 'content': 0.01307311374694109, 'timestamp': '2025-09-10 02:42:16.210564', 'step': 2644, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:16.264696', 'step': 2644, 'epoch': 2} +{'type': 'loss', 'content': 0.003692588536068797, 'timestamp': '2025-09-10 02:42:16.266689', 'step': 2645, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:16.320118', 'step': 2645, 'epoch': 2} +{'type': 'loss', 'content': 0.010986563749611378, 'timestamp': '2025-09-10 02:42:16.326751', 'step': 2646, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:42:33.251418', 'step': 2646, 'epoch': 2} +{'type': 'pplx', 'content': 21589005.77936028, 'timestamp': '2025-09-10 02:42:33.255355', 'step': 2646, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:33.317311', 'step': 2646, 'epoch': 2} +{'type': 'loss', 'content': 0.002613171236589551, 'timestamp': '2025-09-10 02:42:33.321516', 'step': 2647, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:33.396238', 'step': 2647, 'epoch': 2} +{'type': 'loss', 'content': 0.004713746253401041, 'timestamp': '2025-09-10 02:42:33.405833', 'step': 2648, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:33.471657', 'step': 2648, 'epoch': 2} +{'type': 'loss', 'content': 0.004867583978921175, 'timestamp': '2025-09-10 02:42:33.479520', 'step': 2649, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:33.539800', 'step': 2649, 'epoch': 2} +{'type': 'loss', 'content': 0.01249657291918993, 'timestamp': '2025-09-10 02:42:33.545094', 'step': 2650, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:33.616243', 'step': 2650, 'epoch': 2} +{'type': 'loss', 'content': 0.006686859764158726, 'timestamp': '2025-09-10 02:42:33.618738', 'step': 2651, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:33.690537', 'step': 2651, 'epoch': 2} +{'type': 'loss', 'content': 0.002885418012738228, 'timestamp': '2025-09-10 02:42:33.702052', 'step': 2652, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:42:33.769061', 'step': 2652, 'epoch': 2} +{'type': 'loss', 'content': 0.002413894282653928, 'timestamp': '2025-09-10 02:42:33.781093', 'step': 2653, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:33.838014', 'step': 2653, 'epoch': 2} +{'type': 'loss', 'content': 0.009109397418797016, 'timestamp': '2025-09-10 02:42:33.840226', 'step': 2654, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:33.893817', 'step': 2654, 'epoch': 2} +{'type': 'loss', 'content': 0.006080333609133959, 'timestamp': '2025-09-10 02:42:33.896266', 'step': 2655, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:42:33.963193', 'step': 2655, 'epoch': 2} +{'type': 'loss', 'content': 0.0030370343010872602, 'timestamp': '2025-09-10 02:42:33.976212', 'step': 2656, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:34.029410', 'step': 2656, 'epoch': 2} +{'type': 'loss', 'content': 0.020097937434911728, 'timestamp': '2025-09-10 02:42:34.031403', 'step': 2657, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:34.084467', 'step': 2657, 'epoch': 2} +{'type': 'loss', 'content': 0.011799024417996407, 'timestamp': '2025-09-10 02:42:34.086800', 'step': 2658, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:34.139900', 'step': 2658, 'epoch': 2} +{'type': 'loss', 'content': 0.056280989199876785, 'timestamp': '2025-09-10 02:42:34.146195', 'step': 2659, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:34.199236', 'step': 2659, 'epoch': 2} +{'type': 'loss', 'content': 0.009348825551569462, 'timestamp': '2025-09-10 02:42:34.206494', 'step': 2660, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:34.259166', 'step': 2660, 'epoch': 2} +{'type': 'loss', 'content': 0.003881684970110655, 'timestamp': '2025-09-10 02:42:34.261249', 'step': 2661, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:34.314263', 'step': 2661, 'epoch': 2} +{'type': 'loss', 'content': 0.010265841148793697, 'timestamp': '2025-09-10 02:42:34.320976', 'step': 2662, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:34.374334', 'step': 2662, 'epoch': 2} +{'type': 'loss', 'content': 0.018050475046038628, 'timestamp': '2025-09-10 02:42:34.382425', 'step': 2663, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:34.437345', 'step': 2663, 'epoch': 2} +{'type': 'loss', 'content': 0.006232109852135181, 'timestamp': '2025-09-10 02:42:34.447928', 'step': 2664, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:34.500431', 'step': 2664, 'epoch': 2} +{'type': 'loss', 'content': 0.008110949769616127, 'timestamp': '2025-09-10 02:42:34.503387', 'step': 2665, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:34.556326', 'step': 2665, 'epoch': 2} +{'type': 'loss', 'content': 0.00312112458050251, 'timestamp': '2025-09-10 02:42:34.558528', 'step': 2666, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:34.611331', 'step': 2666, 'epoch': 2} +{'type': 'loss', 'content': 0.009352847002446651, 'timestamp': '2025-09-10 02:42:34.613826', 'step': 2667, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:34.666788', 'step': 2667, 'epoch': 2} +{'type': 'loss', 'content': 0.004420316778123379, 'timestamp': '2025-09-10 02:42:34.672342', 'step': 2668, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:42:34.732446', 'step': 2668, 'epoch': 2} +{'type': 'loss', 'content': 0.0024199034087359905, 'timestamp': '2025-09-10 02:42:34.744501', 'step': 2669, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:34.804893', 'step': 2669, 'epoch': 2} +{'type': 'loss', 'content': 0.010698945261538029, 'timestamp': '2025-09-10 02:42:34.815579', 'step': 2670, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:34.870561', 'step': 2670, 'epoch': 2} +{'type': 'loss', 'content': 0.01290913950651884, 'timestamp': '2025-09-10 02:42:34.880198', 'step': 2671, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:34.933156', 'step': 2671, 'epoch': 2} +{'type': 'loss', 'content': 0.004819564521312714, 'timestamp': '2025-09-10 02:42:34.938921', 'step': 2672, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:34.991213', 'step': 2672, 'epoch': 2} +{'type': 'loss', 'content': 0.025426404550671577, 'timestamp': '2025-09-10 02:42:34.993282', 'step': 2673, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:35.046705', 'step': 2673, 'epoch': 2} +{'type': 'loss', 'content': 0.0014216218842193484, 'timestamp': '2025-09-10 02:42:35.048943', 'step': 2674, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:35.102060', 'step': 2674, 'epoch': 2} +{'type': 'loss', 'content': 0.002475936198607087, 'timestamp': '2025-09-10 02:42:35.110267', 'step': 2675, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:35.166947', 'step': 2675, 'epoch': 2} +{'type': 'loss', 'content': 0.004885523580014706, 'timestamp': '2025-09-10 02:42:35.172495', 'step': 2676, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:35.224765', 'step': 2676, 'epoch': 2} +{'type': 'loss', 'content': 0.0006973663694225252, 'timestamp': '2025-09-10 02:42:35.231445', 'step': 2677, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:42:35.297944', 'step': 2677, 'epoch': 2} +{'type': 'loss', 'content': 0.007346330676227808, 'timestamp': '2025-09-10 02:42:35.310174', 'step': 2678, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:35.363024', 'step': 2678, 'epoch': 2} +{'type': 'loss', 'content': 0.001958871725946665, 'timestamp': '2025-09-10 02:42:35.366060', 'step': 2679, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:35.420796', 'step': 2679, 'epoch': 2} +{'type': 'loss', 'content': 0.05266200378537178, 'timestamp': '2025-09-10 02:42:35.431379', 'step': 2680, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:35.484412', 'step': 2680, 'epoch': 2} +{'type': 'loss', 'content': 0.008676085621118546, 'timestamp': '2025-09-10 02:42:35.486672', 'step': 2681, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:35.539661', 'step': 2681, 'epoch': 2} +{'type': 'loss', 'content': 0.03698348626494408, 'timestamp': '2025-09-10 02:42:35.542778', 'step': 2682, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:35.595535', 'step': 2682, 'epoch': 2} +{'type': 'loss', 'content': 0.0012144326465204358, 'timestamp': '2025-09-10 02:42:35.597663', 'step': 2683, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:35.650352', 'step': 2683, 'epoch': 2} +{'type': 'loss', 'content': 0.023621609434485435, 'timestamp': '2025-09-10 02:42:35.656289', 'step': 2684, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:35.709448', 'step': 2684, 'epoch': 2} +{'type': 'loss', 'content': 0.0025063061621040106, 'timestamp': '2025-09-10 02:42:35.715306', 'step': 2685, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:35.768595', 'step': 2685, 'epoch': 2} +{'type': 'loss', 'content': 0.00428465660661459, 'timestamp': '2025-09-10 02:42:35.770786', 'step': 2686, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:35.823766', 'step': 2686, 'epoch': 2} +{'type': 'loss', 'content': 0.007750555872917175, 'timestamp': '2025-09-10 02:42:35.825883', 'step': 2687, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:35.878688', 'step': 2687, 'epoch': 2} +{'type': 'loss', 'content': 0.0037932603154331446, 'timestamp': '2025-09-10 02:42:35.884619', 'step': 2688, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:35.937112', 'step': 2688, 'epoch': 2} +{'type': 'loss', 'content': 0.010463619604706764, 'timestamp': '2025-09-10 02:42:35.943649', 'step': 2689, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:35.996983', 'step': 2689, 'epoch': 2} +{'type': 'loss', 'content': 0.05804077908396721, 'timestamp': '2025-09-10 02:42:36.000003', 'step': 2690, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:36.053248', 'step': 2690, 'epoch': 2} +{'type': 'loss', 'content': 0.007580592297017574, 'timestamp': '2025-09-10 02:42:36.055539', 'step': 2691, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:36.108885', 'step': 2691, 'epoch': 2} +{'type': 'loss', 'content': 0.016909649595618248, 'timestamp': '2025-09-10 02:42:36.114773', 'step': 2692, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:36.166611', 'step': 2692, 'epoch': 2} +{'type': 'loss', 'content': 0.001259945216588676, 'timestamp': '2025-09-10 02:42:36.168707', 'step': 2693, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:36.221381', 'step': 2693, 'epoch': 2} +{'type': 'loss', 'content': 0.01103545818477869, 'timestamp': '2025-09-10 02:42:36.223497', 'step': 2694, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:36.276598', 'step': 2694, 'epoch': 2} +{'type': 'loss', 'content': 0.001709669129922986, 'timestamp': '2025-09-10 02:42:36.278655', 'step': 2695, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:36.331402', 'step': 2695, 'epoch': 2} +{'type': 'loss', 'content': 0.010664992965757847, 'timestamp': '2025-09-10 02:42:36.337416', 'step': 2696, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:36.390392', 'step': 2696, 'epoch': 2} +{'type': 'loss', 'content': 0.004748235456645489, 'timestamp': '2025-09-10 02:42:36.392555', 'step': 2697, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:36.445956', 'step': 2697, 'epoch': 2} +{'type': 'loss', 'content': 0.009418095462024212, 'timestamp': '2025-09-10 02:42:36.452519', 'step': 2698, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:42:36.505660', 'step': 2698, 'epoch': 2} +{'type': 'loss', 'content': 0.0035032329615205526, 'timestamp': '2025-09-10 02:42:36.508011', 'step': 2699, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:36.561291', 'step': 2699, 'epoch': 2} +{'type': 'loss', 'content': 0.011930056847631931, 'timestamp': '2025-09-10 02:42:36.567198', 'step': 2700, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:36.620002', 'step': 2700, 'epoch': 2} +{'type': 'loss', 'content': 0.012250921688973904, 'timestamp': '2025-09-10 02:42:36.622180', 'step': 2701, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:36.676573', 'step': 2701, 'epoch': 2} +{'type': 'loss', 'content': 0.009608532302081585, 'timestamp': '2025-09-10 02:42:36.686131', 'step': 2702, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:36.739573', 'step': 2702, 'epoch': 2} +{'type': 'loss', 'content': 0.023671496659517288, 'timestamp': '2025-09-10 02:42:36.741793', 'step': 2703, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:36.794709', 'step': 2703, 'epoch': 2} +{'type': 'loss', 'content': 0.0012722143437713385, 'timestamp': '2025-09-10 02:42:36.803624', 'step': 2704, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:36.856913', 'step': 2704, 'epoch': 2} +{'type': 'loss', 'content': 0.010743381455540657, 'timestamp': '2025-09-10 02:42:36.859052', 'step': 2705, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:36.911906', 'step': 2705, 'epoch': 2} +{'type': 'loss', 'content': 0.019930407404899597, 'timestamp': '2025-09-10 02:42:36.915550', 'step': 2706, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:36.970775', 'step': 2706, 'epoch': 2} +{'type': 'loss', 'content': 0.002769148675724864, 'timestamp': '2025-09-10 02:42:36.972941', 'step': 2707, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:37.025892', 'step': 2707, 'epoch': 2} +{'type': 'loss', 'content': 0.009284740313887596, 'timestamp': '2025-09-10 02:42:37.034839', 'step': 2708, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:37.087313', 'step': 2708, 'epoch': 2} +{'type': 'loss', 'content': 0.0016837477451190352, 'timestamp': '2025-09-10 02:42:37.089393', 'step': 2709, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:37.141829', 'step': 2709, 'epoch': 2} +{'type': 'loss', 'content': 0.0018571644322946668, 'timestamp': '2025-09-10 02:42:37.144144', 'step': 2710, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:37.196739', 'step': 2710, 'epoch': 2} +{'type': 'loss', 'content': 0.022031908854842186, 'timestamp': '2025-09-10 02:42:37.199771', 'step': 2711, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:37.253667', 'step': 2711, 'epoch': 2} +{'type': 'loss', 'content': 0.0071655805222690105, 'timestamp': '2025-09-10 02:42:37.264081', 'step': 2712, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:37.320944', 'step': 2712, 'epoch': 2} +{'type': 'loss', 'content': 0.0024215078447014093, 'timestamp': '2025-09-10 02:42:37.332179', 'step': 2713, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:37.387476', 'step': 2713, 'epoch': 2} +{'type': 'loss', 'content': 0.002322185318917036, 'timestamp': '2025-09-10 02:42:37.395706', 'step': 2714, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:42:37.492031', 'step': 2714, 'epoch': 2} +{'type': 'loss', 'content': 0.01126450952142477, 'timestamp': '2025-09-10 02:42:37.510566', 'step': 2715, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:37.564284', 'step': 2715, 'epoch': 2} +{'type': 'loss', 'content': 0.001423872308805585, 'timestamp': '2025-09-10 02:42:37.570146', 'step': 2716, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:37.622656', 'step': 2716, 'epoch': 2} +{'type': 'loss', 'content': 0.0017503226408734918, 'timestamp': '2025-09-10 02:42:37.624563', 'step': 2717, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:37.677528', 'step': 2717, 'epoch': 2} +{'type': 'loss', 'content': 0.0006705854902975261, 'timestamp': '2025-09-10 02:42:37.680576', 'step': 2718, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:37.734212', 'step': 2718, 'epoch': 2} +{'type': 'loss', 'content': 0.03989630937576294, 'timestamp': '2025-09-10 02:42:37.736145', 'step': 2719, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:37.789056', 'step': 2719, 'epoch': 2} +{'type': 'loss', 'content': 0.011698446236550808, 'timestamp': '2025-09-10 02:42:37.794597', 'step': 2720, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:37.848021', 'step': 2720, 'epoch': 2} +{'type': 'loss', 'content': 0.0012054798426106572, 'timestamp': '2025-09-10 02:42:37.858529', 'step': 2721, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:37.911662', 'step': 2721, 'epoch': 2} +{'type': 'loss', 'content': 0.0011886332649737597, 'timestamp': '2025-09-10 02:42:37.914119', 'step': 2722, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:37.967450', 'step': 2722, 'epoch': 2} +{'type': 'loss', 'content': 0.003914439585059881, 'timestamp': '2025-09-10 02:42:37.970396', 'step': 2723, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:38.024044', 'step': 2723, 'epoch': 2} +{'type': 'loss', 'content': 0.0028424093034118414, 'timestamp': '2025-09-10 02:42:38.029542', 'step': 2724, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:38.082991', 'step': 2724, 'epoch': 2} +{'type': 'loss', 'content': 0.03507957234978676, 'timestamp': '2025-09-10 02:42:38.085239', 'step': 2725, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.139797', 'step': 2725, 'epoch': 2} +{'type': 'loss', 'content': 0.0009752371115610003, 'timestamp': '2025-09-10 02:42:38.141936', 'step': 2726, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:38.196679', 'step': 2726, 'epoch': 2} +{'type': 'loss', 'content': 0.0025734584778547287, 'timestamp': '2025-09-10 02:42:38.199191', 'step': 2727, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:38.252712', 'step': 2727, 'epoch': 2} +{'type': 'loss', 'content': 0.001260034623555839, 'timestamp': '2025-09-10 02:42:38.258701', 'step': 2728, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:38.311909', 'step': 2728, 'epoch': 2} +{'type': 'loss', 'content': 0.0016585452249273658, 'timestamp': '2025-09-10 02:42:38.319833', 'step': 2729, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:38.372468', 'step': 2729, 'epoch': 2} +{'type': 'loss', 'content': 0.0003432965313550085, 'timestamp': '2025-09-10 02:42:38.374871', 'step': 2730, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:38.428192', 'step': 2730, 'epoch': 2} +{'type': 'loss', 'content': 0.05336865410208702, 'timestamp': '2025-09-10 02:42:38.430322', 'step': 2731, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.485131', 'step': 2731, 'epoch': 2} +{'type': 'loss', 'content': 0.0027787829749286175, 'timestamp': '2025-09-10 02:42:38.491096', 'step': 2732, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:38.545041', 'step': 2732, 'epoch': 2} +{'type': 'loss', 'content': 0.017570756375789642, 'timestamp': '2025-09-10 02:42:38.555526', 'step': 2733, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:42:38.630530', 'step': 2733, 'epoch': 2} +{'type': 'loss', 'content': 0.010207007639110088, 'timestamp': '2025-09-10 02:42:38.644572', 'step': 2734, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:38.697695', 'step': 2734, 'epoch': 2} +{'type': 'loss', 'content': 0.027272040024399757, 'timestamp': '2025-09-10 02:42:38.699974', 'step': 2735, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.752805', 'step': 2735, 'epoch': 2} +{'type': 'loss', 'content': 0.002277930034324527, 'timestamp': '2025-09-10 02:42:38.758716', 'step': 2736, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:38.811435', 'step': 2736, 'epoch': 2} +{'type': 'loss', 'content': 0.005421612877398729, 'timestamp': '2025-09-10 02:42:38.813791', 'step': 2737, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.868472', 'step': 2737, 'epoch': 2} +{'type': 'loss', 'content': 0.0007791418465785682, 'timestamp': '2025-09-10 02:42:38.870795', 'step': 2738, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.924295', 'step': 2738, 'epoch': 2} +{'type': 'loss', 'content': 0.0012010777136310935, 'timestamp': '2025-09-10 02:42:38.926487', 'step': 2739, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:38.980083', 'step': 2739, 'epoch': 2} +{'type': 'loss', 'content': 0.03191911056637764, 'timestamp': '2025-09-10 02:42:38.986061', 'step': 2740, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:39.039049', 'step': 2740, 'epoch': 2} +{'type': 'loss', 'content': 0.0018431125208735466, 'timestamp': '2025-09-10 02:42:39.045412', 'step': 2741, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:39.103936', 'step': 2741, 'epoch': 2} +{'type': 'loss', 'content': 0.011000008322298527, 'timestamp': '2025-09-10 02:42:39.114390', 'step': 2742, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:39.167744', 'step': 2742, 'epoch': 2} +{'type': 'loss', 'content': 0.016186822205781937, 'timestamp': '2025-09-10 02:42:39.170221', 'step': 2743, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:39.223322', 'step': 2743, 'epoch': 2} +{'type': 'loss', 'content': 0.0029471402522176504, 'timestamp': '2025-09-10 02:42:39.229264', 'step': 2744, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:42:39.289150', 'step': 2744, 'epoch': 2} +{'type': 'loss', 'content': 0.0023999048862606287, 'timestamp': '2025-09-10 02:42:39.300740', 'step': 2745, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:39.353939', 'step': 2745, 'epoch': 2} +{'type': 'loss', 'content': 0.0022870872635394335, 'timestamp': '2025-09-10 02:42:39.356223', 'step': 2746, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:39.409578', 'step': 2746, 'epoch': 2} +{'type': 'loss', 'content': 0.015325046144425869, 'timestamp': '2025-09-10 02:42:39.411779', 'step': 2747, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:39.465311', 'step': 2747, 'epoch': 2} +{'type': 'loss', 'content': 0.005069035571068525, 'timestamp': '2025-09-10 02:42:39.474322', 'step': 2748, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:39.526720', 'step': 2748, 'epoch': 2} +{'type': 'loss', 'content': 0.03320688381791115, 'timestamp': '2025-09-10 02:42:39.529539', 'step': 2749, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:39.585444', 'step': 2749, 'epoch': 2} +{'type': 'loss', 'content': 0.04804794862866402, 'timestamp': '2025-09-10 02:42:39.590258', 'step': 2750, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:39.644796', 'step': 2750, 'epoch': 2} +{'type': 'loss', 'content': 0.008649543859064579, 'timestamp': '2025-09-10 02:42:39.649829', 'step': 2751, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:39.704553', 'step': 2751, 'epoch': 2} +{'type': 'loss', 'content': 0.0009506465867161751, 'timestamp': '2025-09-10 02:42:39.711124', 'step': 2752, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:39.765100', 'step': 2752, 'epoch': 2} +{'type': 'loss', 'content': 0.0006533870473504066, 'timestamp': '2025-09-10 02:42:39.775425', 'step': 2753, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:39.829679', 'step': 2753, 'epoch': 2} +{'type': 'loss', 'content': 0.00020588845654856414, 'timestamp': '2025-09-10 02:42:39.831895', 'step': 2754, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:39.886362', 'step': 2754, 'epoch': 2} +{'type': 'loss', 'content': 0.0023811091668903828, 'timestamp': '2025-09-10 02:42:39.888657', 'step': 2755, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:39.943347', 'step': 2755, 'epoch': 2} +{'type': 'loss', 'content': 0.011465086601674557, 'timestamp': '2025-09-10 02:42:39.949635', 'step': 2756, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:40.003558', 'step': 2756, 'epoch': 2} +{'type': 'loss', 'content': 0.006404041778296232, 'timestamp': '2025-09-10 02:42:40.005994', 'step': 2757, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:40.059263', 'step': 2757, 'epoch': 2} +{'type': 'loss', 'content': 0.008716910146176815, 'timestamp': '2025-09-10 02:42:40.061377', 'step': 2758, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:40.115747', 'step': 2758, 'epoch': 2} +{'type': 'loss', 'content': 0.008281629532575607, 'timestamp': '2025-09-10 02:42:40.121771', 'step': 2759, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:40.176569', 'step': 2759, 'epoch': 2} +{'type': 'loss', 'content': 0.001604673219844699, 'timestamp': '2025-09-10 02:42:40.187189', 'step': 2760, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:40.239493', 'step': 2760, 'epoch': 2} +{'type': 'loss', 'content': 0.0006515085697174072, 'timestamp': '2025-09-10 02:42:40.241659', 'step': 2761, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:40.294362', 'step': 2761, 'epoch': 2} +{'type': 'loss', 'content': 0.008057648316025734, 'timestamp': '2025-09-10 02:42:40.296538', 'step': 2762, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:40.349941', 'step': 2762, 'epoch': 2} +{'type': 'loss', 'content': 0.005328737664967775, 'timestamp': '2025-09-10 02:42:40.352159', 'step': 2763, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:40.405354', 'step': 2763, 'epoch': 2} +{'type': 'loss', 'content': 0.0034779810812324286, 'timestamp': '2025-09-10 02:42:40.411107', 'step': 2764, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:40.463651', 'step': 2764, 'epoch': 2} +{'type': 'loss', 'content': 0.0028056337032467127, 'timestamp': '2025-09-10 02:42:40.473935', 'step': 2765, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:42:40.528692', 'step': 2765, 'epoch': 2} +{'type': 'loss', 'content': 0.00816755834966898, 'timestamp': '2025-09-10 02:42:40.538491', 'step': 2766, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:40.591380', 'step': 2766, 'epoch': 2} +{'type': 'loss', 'content': 0.0049702781252563, 'timestamp': '2025-09-10 02:42:40.593516', 'step': 2767, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:40.646966', 'step': 2767, 'epoch': 2} +{'type': 'loss', 'content': 0.041935887187719345, 'timestamp': '2025-09-10 02:42:40.652587', 'step': 2768, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:40.704843', 'step': 2768, 'epoch': 2} +{'type': 'loss', 'content': 0.0027195082511752844, 'timestamp': '2025-09-10 02:42:40.707043', 'step': 2769, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:40.760335', 'step': 2769, 'epoch': 2} +{'type': 'loss', 'content': 0.028412478044629097, 'timestamp': '2025-09-10 02:42:40.762543', 'step': 2770, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:40.815461', 'step': 2770, 'epoch': 2} +{'type': 'loss', 'content': 0.00922696478664875, 'timestamp': '2025-09-10 02:42:40.817892', 'step': 2771, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:40.871019', 'step': 2771, 'epoch': 2} +{'type': 'loss', 'content': 0.022764507681131363, 'timestamp': '2025-09-10 02:42:40.876910', 'step': 2772, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:40.929034', 'step': 2772, 'epoch': 2} +{'type': 'loss', 'content': 0.006302524823695421, 'timestamp': '2025-09-10 02:42:40.935838', 'step': 2773, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:40.988522', 'step': 2773, 'epoch': 2} +{'type': 'loss', 'content': 0.01538578700274229, 'timestamp': '2025-09-10 02:42:40.995275', 'step': 2774, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:41.049281', 'step': 2774, 'epoch': 2} +{'type': 'loss', 'content': 0.008590908721089363, 'timestamp': '2025-09-10 02:42:41.051591', 'step': 2775, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:41.105108', 'step': 2775, 'epoch': 2} +{'type': 'loss', 'content': 0.027642270550131798, 'timestamp': '2025-09-10 02:42:41.112701', 'step': 2776, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:41.165405', 'step': 2776, 'epoch': 2} +{'type': 'loss', 'content': 0.028574557974934578, 'timestamp': '2025-09-10 02:42:41.172240', 'step': 2777, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:41.225251', 'step': 2777, 'epoch': 2} +{'type': 'loss', 'content': 0.030429674312472343, 'timestamp': '2025-09-10 02:42:41.233462', 'step': 2778, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:41.286597', 'step': 2778, 'epoch': 2} +{'type': 'loss', 'content': 0.01037047989666462, 'timestamp': '2025-09-10 02:42:41.294779', 'step': 2779, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:41.348226', 'step': 2779, 'epoch': 2} +{'type': 'loss', 'content': 0.00216799252666533, 'timestamp': '2025-09-10 02:42:41.355519', 'step': 2780, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:41.408134', 'step': 2780, 'epoch': 2} +{'type': 'loss', 'content': 0.01218800712376833, 'timestamp': '2025-09-10 02:42:41.410863', 'step': 2781, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:41.463785', 'step': 2781, 'epoch': 2} +{'type': 'loss', 'content': 0.006440795958042145, 'timestamp': '2025-09-10 02:42:41.466147', 'step': 2782, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:41.518848', 'step': 2782, 'epoch': 2} +{'type': 'loss', 'content': 0.0038501867093145847, 'timestamp': '2025-09-10 02:42:41.521100', 'step': 2783, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:42:41.574028', 'step': 2783, 'epoch': 2} +{'type': 'loss', 'content': 0.0091011431068182, 'timestamp': '2025-09-10 02:42:41.580889', 'step': 2784, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:41.638278', 'step': 2784, 'epoch': 2} +{'type': 'loss', 'content': 0.009202203713357449, 'timestamp': '2025-09-10 02:42:41.645142', 'step': 2785, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:42:41.702926', 'step': 2785, 'epoch': 2} +{'type': 'loss', 'content': 0.0011785599635913968, 'timestamp': '2025-09-10 02:42:41.711291', 'step': 2786, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:42:41.764352', 'step': 2786, 'epoch': 2} +{'type': 'loss', 'content': 0.001529795117676258, 'timestamp': '2025-09-10 02:42:41.770960', 'step': 2787, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:41.826832', 'step': 2787, 'epoch': 2} +{'type': 'loss', 'content': 0.0076119364239275455, 'timestamp': '2025-09-10 02:42:41.837184', 'step': 2788, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:42:41.897032', 'step': 2788, 'epoch': 2} +{'type': 'loss', 'content': 0.010635657235980034, 'timestamp': '2025-09-10 02:42:41.899066', 'step': 2789, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:41.952188', 'step': 2789, 'epoch': 2} +{'type': 'loss', 'content': 0.03798241168260574, 'timestamp': '2025-09-10 02:42:41.954297', 'step': 2790, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:42:42.012230', 'step': 2790, 'epoch': 2} +{'type': 'loss', 'content': 0.027064664289355278, 'timestamp': '2025-09-10 02:42:42.014487', 'step': 2791, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:42.067891', 'step': 2791, 'epoch': 2} +{'type': 'loss', 'content': 0.005027863197028637, 'timestamp': '2025-09-10 02:42:42.073778', 'step': 2792, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:42.136063', 'step': 2792, 'epoch': 2} +{'type': 'loss', 'content': 0.008662876673042774, 'timestamp': '2025-09-10 02:42:42.138373', 'step': 2793, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:42:59.342017', 'step': 2793, 'epoch': 2} +{'type': 'pplx', 'content': 19602792.91243142, 'timestamp': '2025-09-10 02:42:59.345053', 'step': 2793, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:42:59.400535', 'step': 2793, 'epoch': 2} +{'type': 'loss', 'content': 0.0018834838410839438, 'timestamp': '2025-09-10 02:42:59.407707', 'step': 2794, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:42:59.467168', 'step': 2794, 'epoch': 2} +{'type': 'loss', 'content': 0.05520009994506836, 'timestamp': '2025-09-10 02:42:59.477603', 'step': 2795, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:42:59.531430', 'step': 2795, 'epoch': 2} +{'type': 'loss', 'content': 0.007993536069989204, 'timestamp': '2025-09-10 02:42:59.537969', 'step': 2796, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:42:59.591473', 'step': 2796, 'epoch': 2} +{'type': 'loss', 'content': 0.0039032420609146357, 'timestamp': '2025-09-10 02:42:59.594265', 'step': 2797, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:42:59.647649', 'step': 2797, 'epoch': 2} +{'type': 'loss', 'content': 0.0015108529478311539, 'timestamp': '2025-09-10 02:42:59.649799', 'step': 2798, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:59.703077', 'step': 2798, 'epoch': 2} +{'type': 'loss', 'content': 0.0070057399570941925, 'timestamp': '2025-09-10 02:42:59.705393', 'step': 2799, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:59.760627', 'step': 2799, 'epoch': 2} +{'type': 'loss', 'content': 0.003342447802424431, 'timestamp': '2025-09-10 02:42:59.766720', 'step': 2800, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:59.820615', 'step': 2800, 'epoch': 2} +{'type': 'loss', 'content': 0.02086058259010315, 'timestamp': '2025-09-10 02:42:59.823304', 'step': 2801, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:42:59.886675', 'step': 2801, 'epoch': 2} +{'type': 'loss', 'content': 0.03028605319559574, 'timestamp': '2025-09-10 02:42:59.897796', 'step': 2802, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:42:59.950926', 'step': 2802, 'epoch': 2} +{'type': 'loss', 'content': 0.018823647871613503, 'timestamp': '2025-09-10 02:42:59.952971', 'step': 2803, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:00.006706', 'step': 2803, 'epoch': 2} +{'type': 'loss', 'content': 0.008998965844511986, 'timestamp': '2025-09-10 02:43:00.012786', 'step': 2804, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:00.066314', 'step': 2804, 'epoch': 2} +{'type': 'loss', 'content': 0.0027237108442932367, 'timestamp': '2025-09-10 02:43:00.076813', 'step': 2805, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:00.130386', 'step': 2805, 'epoch': 2} +{'type': 'loss', 'content': 0.01370945107191801, 'timestamp': '2025-09-10 02:43:00.138318', 'step': 2806, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:00.192275', 'step': 2806, 'epoch': 2} +{'type': 'loss', 'content': 0.024837393313646317, 'timestamp': '2025-09-10 02:43:00.194319', 'step': 2807, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:00.247402', 'step': 2807, 'epoch': 2} +{'type': 'loss', 'content': 0.018284594640135765, 'timestamp': '2025-09-10 02:43:00.253438', 'step': 2808, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:00.305915', 'step': 2808, 'epoch': 2} +{'type': 'loss', 'content': 0.006133030168712139, 'timestamp': '2025-09-10 02:43:00.308178', 'step': 2809, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:00.361104', 'step': 2809, 'epoch': 2} +{'type': 'loss', 'content': 0.009379004128277302, 'timestamp': '2025-09-10 02:43:00.363271', 'step': 2810, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:00.418289', 'step': 2810, 'epoch': 2} +{'type': 'loss', 'content': 0.00395641615614295, 'timestamp': '2025-09-10 02:43:00.428126', 'step': 2811, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:00.481887', 'step': 2811, 'epoch': 2} +{'type': 'loss', 'content': 0.022432571277022362, 'timestamp': '2025-09-10 02:43:00.489433', 'step': 2812, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:00.543822', 'step': 2812, 'epoch': 2} +{'type': 'loss', 'content': 0.0022870022803545, 'timestamp': '2025-09-10 02:43:00.546439', 'step': 2813, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:00.600026', 'step': 2813, 'epoch': 2} +{'type': 'loss', 'content': 0.0036130433436483145, 'timestamp': '2025-09-10 02:43:00.602956', 'step': 2814, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:00.656359', 'step': 2814, 'epoch': 2} +{'type': 'loss', 'content': 0.0028867374639958143, 'timestamp': '2025-09-10 02:43:00.658538', 'step': 2815, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:00.711526', 'step': 2815, 'epoch': 2} +{'type': 'loss', 'content': 0.003171600867062807, 'timestamp': '2025-09-10 02:43:00.717833', 'step': 2816, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:00.770845', 'step': 2816, 'epoch': 2} +{'type': 'loss', 'content': 0.01314304955303669, 'timestamp': '2025-09-10 02:43:00.773251', 'step': 2817, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:00.826399', 'step': 2817, 'epoch': 2} +{'type': 'loss', 'content': 0.011470197699964046, 'timestamp': '2025-09-10 02:43:00.828940', 'step': 2818, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:00.882086', 'step': 2818, 'epoch': 2} +{'type': 'loss', 'content': 0.009405607357621193, 'timestamp': '2025-09-10 02:43:00.885681', 'step': 2819, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:00.946119', 'step': 2819, 'epoch': 2} +{'type': 'loss', 'content': 0.006938908249139786, 'timestamp': '2025-09-10 02:43:00.957331', 'step': 2820, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:01.013266', 'step': 2820, 'epoch': 2} +{'type': 'loss', 'content': 0.03679962828755379, 'timestamp': '2025-09-10 02:43:01.023258', 'step': 2821, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:01.076607', 'step': 2821, 'epoch': 2} +{'type': 'loss', 'content': 0.01181592233479023, 'timestamp': '2025-09-10 02:43:01.078852', 'step': 2822, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:01.132584', 'step': 2822, 'epoch': 2} +{'type': 'loss', 'content': 0.005727309733629227, 'timestamp': '2025-09-10 02:43:01.134665', 'step': 2823, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:01.190903', 'step': 2823, 'epoch': 2} +{'type': 'loss', 'content': 0.011616931296885014, 'timestamp': '2025-09-10 02:43:01.196676', 'step': 2824, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:01.254901', 'step': 2824, 'epoch': 2} +{'type': 'loss', 'content': 0.011883147992193699, 'timestamp': '2025-09-10 02:43:01.263117', 'step': 2825, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:01.316817', 'step': 2825, 'epoch': 2} +{'type': 'loss', 'content': 0.008945704437792301, 'timestamp': '2025-09-10 02:43:01.323132', 'step': 2826, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:01.379303', 'step': 2826, 'epoch': 2} +{'type': 'loss', 'content': 0.015747204422950745, 'timestamp': '2025-09-10 02:43:01.381679', 'step': 2827, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:01.438407', 'step': 2827, 'epoch': 2} +{'type': 'loss', 'content': 0.0066430093720555305, 'timestamp': '2025-09-10 02:43:01.445440', 'step': 2828, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:01.497663', 'step': 2828, 'epoch': 2} +{'type': 'loss', 'content': 0.002757689217105508, 'timestamp': '2025-09-10 02:43:01.499936', 'step': 2829, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:01.554252', 'step': 2829, 'epoch': 2} +{'type': 'loss', 'content': 0.01923009753227234, 'timestamp': '2025-09-10 02:43:01.556463', 'step': 2830, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:01.609841', 'step': 2830, 'epoch': 2} +{'type': 'loss', 'content': 0.0152445612475276, 'timestamp': '2025-09-10 02:43:01.611921', 'step': 2831, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:01.664769', 'step': 2831, 'epoch': 2} +{'type': 'loss', 'content': 0.026830332353711128, 'timestamp': '2025-09-10 02:43:01.670665', 'step': 2832, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:01.723139', 'step': 2832, 'epoch': 2} +{'type': 'loss', 'content': 0.018680280074477196, 'timestamp': '2025-09-10 02:43:01.725242', 'step': 2833, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:01.777902', 'step': 2833, 'epoch': 2} +{'type': 'loss', 'content': 0.007404194213449955, 'timestamp': '2025-09-10 02:43:01.780418', 'step': 2834, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:01.832885', 'step': 2834, 'epoch': 2} +{'type': 'loss', 'content': 0.023370621725916862, 'timestamp': '2025-09-10 02:43:01.836092', 'step': 2835, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:01.889454', 'step': 2835, 'epoch': 2} +{'type': 'loss', 'content': 0.015749046579003334, 'timestamp': '2025-09-10 02:43:01.897009', 'step': 2836, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:01.949050', 'step': 2836, 'epoch': 2} +{'type': 'loss', 'content': 0.0016447792295366526, 'timestamp': '2025-09-10 02:43:01.951174', 'step': 2837, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:02.004004', 'step': 2837, 'epoch': 2} +{'type': 'loss', 'content': 0.0069174072705209255, 'timestamp': '2025-09-10 02:43:02.006141', 'step': 2838, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:02.058649', 'step': 2838, 'epoch': 2} +{'type': 'loss', 'content': 0.0037390643265098333, 'timestamp': '2025-09-10 02:43:02.062924', 'step': 2839, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:02.116690', 'step': 2839, 'epoch': 2} +{'type': 'loss', 'content': 0.00888837967067957, 'timestamp': '2025-09-10 02:43:02.122647', 'step': 2840, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:02.174790', 'step': 2840, 'epoch': 2} +{'type': 'loss', 'content': 0.004037219565361738, 'timestamp': '2025-09-10 02:43:02.181377', 'step': 2841, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:02.234740', 'step': 2841, 'epoch': 2} +{'type': 'loss', 'content': 0.026061559095978737, 'timestamp': '2025-09-10 02:43:02.236837', 'step': 2842, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:02.303534', 'step': 2842, 'epoch': 2} +{'type': 'loss', 'content': 0.011404995806515217, 'timestamp': '2025-09-10 02:43:02.315799', 'step': 2843, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:02.371015', 'step': 2843, 'epoch': 2} +{'type': 'loss', 'content': 0.0029660488944500685, 'timestamp': '2025-09-10 02:43:02.376829', 'step': 2844, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:02.431660', 'step': 2844, 'epoch': 2} +{'type': 'loss', 'content': 0.038797635585069656, 'timestamp': '2025-09-10 02:43:02.434272', 'step': 2845, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:02.491686', 'step': 2845, 'epoch': 2} +{'type': 'loss', 'content': 0.024347158148884773, 'timestamp': '2025-09-10 02:43:02.494670', 'step': 2846, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:43:02.566152', 'step': 2846, 'epoch': 2} +{'type': 'loss', 'content': 0.032168835401535034, 'timestamp': '2025-09-10 02:43:02.578822', 'step': 2847, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:02.633020', 'step': 2847, 'epoch': 2} +{'type': 'loss', 'content': 0.007962683215737343, 'timestamp': '2025-09-10 02:43:02.640291', 'step': 2848, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:02.699124', 'step': 2848, 'epoch': 2} +{'type': 'loss', 'content': 0.005333451088517904, 'timestamp': '2025-09-10 02:43:02.710673', 'step': 2849, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:02.764411', 'step': 2849, 'epoch': 2} +{'type': 'loss', 'content': 0.006212006323039532, 'timestamp': '2025-09-10 02:43:02.767492', 'step': 2850, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:02.820589', 'step': 2850, 'epoch': 2} +{'type': 'loss', 'content': 0.004719972610473633, 'timestamp': '2025-09-10 02:43:02.822852', 'step': 2851, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:02.875665', 'step': 2851, 'epoch': 2} +{'type': 'loss', 'content': 0.008301042951643467, 'timestamp': '2025-09-10 02:43:02.881618', 'step': 2852, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:02.939941', 'step': 2852, 'epoch': 2} +{'type': 'loss', 'content': 0.013020007871091366, 'timestamp': '2025-09-10 02:43:02.941777', 'step': 2853, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:02.993953', 'step': 2853, 'epoch': 2} +{'type': 'loss', 'content': 0.010510066524147987, 'timestamp': '2025-09-10 02:43:02.996025', 'step': 2854, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:03.063923', 'step': 2854, 'epoch': 2} +{'type': 'loss', 'content': 0.00926880817860365, 'timestamp': '2025-09-10 02:43:03.076390', 'step': 2855, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:03.129070', 'step': 2855, 'epoch': 2} +{'type': 'loss', 'content': 0.003856593044474721, 'timestamp': '2025-09-10 02:43:03.135143', 'step': 2856, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:03.187409', 'step': 2856, 'epoch': 2} +{'type': 'loss', 'content': 0.015365725383162498, 'timestamp': '2025-09-10 02:43:03.189891', 'step': 2857, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:03.243437', 'step': 2857, 'epoch': 2} +{'type': 'loss', 'content': 0.004318633582442999, 'timestamp': '2025-09-10 02:43:03.245955', 'step': 2858, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:03.299576', 'step': 2858, 'epoch': 2} +{'type': 'loss', 'content': 0.0041367243975400925, 'timestamp': '2025-09-10 02:43:03.305378', 'step': 2859, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:03.358983', 'step': 2859, 'epoch': 2} +{'type': 'loss', 'content': 0.023787083104252815, 'timestamp': '2025-09-10 02:43:03.365194', 'step': 2860, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:03.418386', 'step': 2860, 'epoch': 2} +{'type': 'loss', 'content': 0.018490174785256386, 'timestamp': '2025-09-10 02:43:03.424581', 'step': 2861, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:03.485934', 'step': 2861, 'epoch': 2} +{'type': 'loss', 'content': 0.008858558721840382, 'timestamp': '2025-09-10 02:43:03.496844', 'step': 2862, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:03.550325', 'step': 2862, 'epoch': 2} +{'type': 'loss', 'content': 0.026881933212280273, 'timestamp': '2025-09-10 02:43:03.553214', 'step': 2863, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:03.605954', 'step': 2863, 'epoch': 2} +{'type': 'loss', 'content': 0.02366648241877556, 'timestamp': '2025-09-10 02:43:03.611823', 'step': 2864, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:03.663736', 'step': 2864, 'epoch': 2} +{'type': 'loss', 'content': 0.005855969153344631, 'timestamp': '2025-09-10 02:43:03.666741', 'step': 2865, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:03.720641', 'step': 2865, 'epoch': 2} +{'type': 'loss', 'content': 0.004256942309439182, 'timestamp': '2025-09-10 02:43:03.730228', 'step': 2866, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:03.784292', 'step': 2866, 'epoch': 2} +{'type': 'loss', 'content': 0.015499325469136238, 'timestamp': '2025-09-10 02:43:03.794071', 'step': 2867, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:03.855182', 'step': 2867, 'epoch': 2} +{'type': 'loss', 'content': 0.00483601214364171, 'timestamp': '2025-09-10 02:43:03.866868', 'step': 2868, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:03.919628', 'step': 2868, 'epoch': 2} +{'type': 'loss', 'content': 0.022214515134692192, 'timestamp': '2025-09-10 02:43:03.926086', 'step': 2869, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:03.980371', 'step': 2869, 'epoch': 2} +{'type': 'loss', 'content': 0.0195834469050169, 'timestamp': '2025-09-10 02:43:03.982799', 'step': 2870, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:04.050282', 'step': 2870, 'epoch': 2} +{'type': 'loss', 'content': 0.019800430163741112, 'timestamp': '2025-09-10 02:43:04.062831', 'step': 2871, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:04.116345', 'step': 2871, 'epoch': 2} +{'type': 'loss', 'content': 0.007477868348360062, 'timestamp': '2025-09-10 02:43:04.122178', 'step': 2872, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:04.181583', 'step': 2872, 'epoch': 2} +{'type': 'loss', 'content': 0.007802496198564768, 'timestamp': '2025-09-10 02:43:04.193147', 'step': 2873, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:04.251165', 'step': 2873, 'epoch': 2} +{'type': 'loss', 'content': 0.03706588223576546, 'timestamp': '2025-09-10 02:43:04.261599', 'step': 2874, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:04.314610', 'step': 2874, 'epoch': 2} +{'type': 'loss', 'content': 0.0134931905195117, 'timestamp': '2025-09-10 02:43:04.316784', 'step': 2875, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:04.370048', 'step': 2875, 'epoch': 2} +{'type': 'loss', 'content': 0.014370128512382507, 'timestamp': '2025-09-10 02:43:04.379148', 'step': 2876, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:04.431493', 'step': 2876, 'epoch': 2} +{'type': 'loss', 'content': 0.015962282195687294, 'timestamp': '2025-09-10 02:43:04.433854', 'step': 2877, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:04.487114', 'step': 2877, 'epoch': 2} +{'type': 'loss', 'content': 0.006657306104898453, 'timestamp': '2025-09-10 02:43:04.489333', 'step': 2878, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:04.542139', 'step': 2878, 'epoch': 2} +{'type': 'loss', 'content': 0.003941268660128117, 'timestamp': '2025-09-10 02:43:04.544195', 'step': 2879, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:04.597904', 'step': 2879, 'epoch': 2} +{'type': 'loss', 'content': 0.01750280149281025, 'timestamp': '2025-09-10 02:43:04.608294', 'step': 2880, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:04.660779', 'step': 2880, 'epoch': 2} +{'type': 'loss', 'content': 0.005533973220735788, 'timestamp': '2025-09-10 02:43:04.663667', 'step': 2881, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:04.715937', 'step': 2881, 'epoch': 2} +{'type': 'loss', 'content': 0.011847132816910744, 'timestamp': '2025-09-10 02:43:04.718035', 'step': 2882, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:04.770519', 'step': 2882, 'epoch': 2} +{'type': 'loss', 'content': 0.017021920531988144, 'timestamp': '2025-09-10 02:43:04.773599', 'step': 2883, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:04.826530', 'step': 2883, 'epoch': 2} +{'type': 'loss', 'content': 0.012416626326739788, 'timestamp': '2025-09-10 02:43:04.832148', 'step': 2884, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:04.884146', 'step': 2884, 'epoch': 2} +{'type': 'loss', 'content': 0.01057855598628521, 'timestamp': '2025-09-10 02:43:04.890936', 'step': 2885, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:04.943546', 'step': 2885, 'epoch': 2} +{'type': 'loss', 'content': 0.0014162680599838495, 'timestamp': '2025-09-10 02:43:04.945553', 'step': 2886, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:04.997702', 'step': 2886, 'epoch': 2} +{'type': 'loss', 'content': 0.018539367243647575, 'timestamp': '2025-09-10 02:43:05.000751', 'step': 2887, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:05.053755', 'step': 2887, 'epoch': 2} +{'type': 'loss', 'content': 0.022198939695954323, 'timestamp': '2025-09-10 02:43:05.064139', 'step': 2888, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:05.115939', 'step': 2888, 'epoch': 2} +{'type': 'loss', 'content': 0.011367526836693287, 'timestamp': '2025-09-10 02:43:05.119000', 'step': 2889, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:43:05.192528', 'step': 2889, 'epoch': 2} +{'type': 'loss', 'content': 0.008567769080400467, 'timestamp': '2025-09-10 02:43:05.206219', 'step': 2890, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:05.258998', 'step': 2890, 'epoch': 2} +{'type': 'loss', 'content': 0.026089565828442574, 'timestamp': '2025-09-10 02:43:05.261106', 'step': 2891, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:05.313952', 'step': 2891, 'epoch': 2} +{'type': 'loss', 'content': 0.008742443285882473, 'timestamp': '2025-09-10 02:43:05.319796', 'step': 2892, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 13760083599040.0}, 'timestamp': '2025-09-10 02:43:05.417134', 'step': 2892, 'epoch': 2} +{'type': 'loss', 'content': 0.0068065267987549305, 'timestamp': '2025-09-10 02:43:05.438144', 'step': 2893, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:05.491057', 'step': 2893, 'epoch': 2} +{'type': 'loss', 'content': 0.0024250969290733337, 'timestamp': '2025-09-10 02:43:05.493166', 'step': 2894, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:05.545641', 'step': 2894, 'epoch': 2} +{'type': 'loss', 'content': 0.006088267546147108, 'timestamp': '2025-09-10 02:43:05.548762', 'step': 2895, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:05.601537', 'step': 2895, 'epoch': 2} +{'type': 'loss', 'content': 0.008796818554401398, 'timestamp': '2025-09-10 02:43:05.607140', 'step': 2896, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:05.659137', 'step': 2896, 'epoch': 2} +{'type': 'loss', 'content': 0.007444328628480434, 'timestamp': '2025-09-10 02:43:05.661115', 'step': 2897, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:05.714345', 'step': 2897, 'epoch': 2} +{'type': 'loss', 'content': 0.006030111573636532, 'timestamp': '2025-09-10 02:43:05.723961', 'step': 2898, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:05.776696', 'step': 2898, 'epoch': 2} +{'type': 'loss', 'content': 0.0022526364773511887, 'timestamp': '2025-09-10 02:43:05.779904', 'step': 2899, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:05.833221', 'step': 2899, 'epoch': 2} +{'type': 'loss', 'content': 0.01635793223977089, 'timestamp': '2025-09-10 02:43:05.838829', 'step': 2900, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:05.903235', 'step': 2900, 'epoch': 2} +{'type': 'loss', 'content': 0.01230801921337843, 'timestamp': '2025-09-10 02:43:05.916497', 'step': 2901, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:05.969267', 'step': 2901, 'epoch': 2} +{'type': 'loss', 'content': 0.028225775808095932, 'timestamp': '2025-09-10 02:43:05.971563', 'step': 2902, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-09-10 02:43:06.054100', 'step': 2902, 'epoch': 2} +{'type': 'loss', 'content': 0.0033608644735068083, 'timestamp': '2025-09-10 02:43:06.069544', 'step': 2903, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:06.122906', 'step': 2903, 'epoch': 2} +{'type': 'loss', 'content': 0.0027830079197883606, 'timestamp': '2025-09-10 02:43:06.128539', 'step': 2904, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:06.180541', 'step': 2904, 'epoch': 2} +{'type': 'loss', 'content': 0.005439094267785549, 'timestamp': '2025-09-10 02:43:06.187374', 'step': 2905, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:06.240602', 'step': 2905, 'epoch': 2} +{'type': 'loss', 'content': 0.01805257238447666, 'timestamp': '2025-09-10 02:43:06.242929', 'step': 2906, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.295456', 'step': 2906, 'epoch': 2} +{'type': 'loss', 'content': 0.010302051901817322, 'timestamp': '2025-09-10 02:43:06.297715', 'step': 2907, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:06.351825', 'step': 2907, 'epoch': 2} +{'type': 'loss', 'content': 0.005836172960698605, 'timestamp': '2025-09-10 02:43:06.362416', 'step': 2908, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:06.414883', 'step': 2908, 'epoch': 2} +{'type': 'loss', 'content': 0.0068815648555755615, 'timestamp': '2025-09-10 02:43:06.425141', 'step': 2909, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:06.477885', 'step': 2909, 'epoch': 2} +{'type': 'loss', 'content': 0.003785443725064397, 'timestamp': '2025-09-10 02:43:06.479909', 'step': 2910, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:06.533054', 'step': 2910, 'epoch': 2} +{'type': 'loss', 'content': 0.021662486717104912, 'timestamp': '2025-09-10 02:43:06.542643', 'step': 2911, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.595728', 'step': 2911, 'epoch': 2} +{'type': 'loss', 'content': 0.0022201475221663713, 'timestamp': '2025-09-10 02:43:06.601527', 'step': 2912, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.654111', 'step': 2912, 'epoch': 2} +{'type': 'loss', 'content': 0.0036644372157752514, 'timestamp': '2025-09-10 02:43:06.656375', 'step': 2913, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.708778', 'step': 2913, 'epoch': 2} +{'type': 'loss', 'content': 0.0043023936450481415, 'timestamp': '2025-09-10 02:43:06.710805', 'step': 2914, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:43:06.772647', 'step': 2914, 'epoch': 2} +{'type': 'loss', 'content': 0.0011631603119894862, 'timestamp': '2025-09-10 02:43:06.783744', 'step': 2915, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.836460', 'step': 2915, 'epoch': 2} +{'type': 'loss', 'content': 0.015948059037327766, 'timestamp': '2025-09-10 02:43:06.842096', 'step': 2916, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:06.893801', 'step': 2916, 'epoch': 2} +{'type': 'loss', 'content': 0.022618483752012253, 'timestamp': '2025-09-10 02:43:06.895957', 'step': 2917, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:06.948363', 'step': 2917, 'epoch': 2} +{'type': 'loss', 'content': 0.023323001340031624, 'timestamp': '2025-09-10 02:43:06.950387', 'step': 2918, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:07.004055', 'step': 2918, 'epoch': 2} +{'type': 'loss', 'content': 0.011524630710482597, 'timestamp': '2025-09-10 02:43:07.013672', 'step': 2919, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:07.074388', 'step': 2919, 'epoch': 2} +{'type': 'loss', 'content': 0.0015407305909320712, 'timestamp': '2025-09-10 02:43:07.086086', 'step': 2920, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:07.137786', 'step': 2920, 'epoch': 2} +{'type': 'loss', 'content': 0.03542738035321236, 'timestamp': '2025-09-10 02:43:07.139892', 'step': 2921, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:07.192482', 'step': 2921, 'epoch': 2} +{'type': 'loss', 'content': 0.0011894494527950883, 'timestamp': '2025-09-10 02:43:07.194645', 'step': 2922, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:07.247252', 'step': 2922, 'epoch': 2} +{'type': 'loss', 'content': 0.007411637343466282, 'timestamp': '2025-09-10 02:43:07.255381', 'step': 2923, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:07.308235', 'step': 2923, 'epoch': 2} +{'type': 'loss', 'content': 0.002640027552843094, 'timestamp': '2025-09-10 02:43:07.313807', 'step': 2924, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:07.365289', 'step': 2924, 'epoch': 2} +{'type': 'loss', 'content': 0.005221573170274496, 'timestamp': '2025-09-10 02:43:07.367536', 'step': 2925, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:07.419874', 'step': 2925, 'epoch': 2} +{'type': 'loss', 'content': 0.010517584159970284, 'timestamp': '2025-09-10 02:43:07.422846', 'step': 2926, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:07.475887', 'step': 2926, 'epoch': 2} +{'type': 'loss', 'content': 0.005084399599581957, 'timestamp': '2025-09-10 02:43:07.482402', 'step': 2927, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:07.534942', 'step': 2927, 'epoch': 2} +{'type': 'loss', 'content': 0.02717072144150734, 'timestamp': '2025-09-10 02:43:07.540532', 'step': 2928, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:07.592832', 'step': 2928, 'epoch': 2} +{'type': 'loss', 'content': 0.02197488769888878, 'timestamp': '2025-09-10 02:43:07.594982', 'step': 2929, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:43:07.667028', 'step': 2929, 'epoch': 2} +{'type': 'loss', 'content': 0.008209539577364922, 'timestamp': '2025-09-10 02:43:07.680518', 'step': 2930, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:07.735665', 'step': 2930, 'epoch': 2} +{'type': 'loss', 'content': 0.015388053841888905, 'timestamp': '2025-09-10 02:43:07.745495', 'step': 2931, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:07.798293', 'step': 2931, 'epoch': 2} +{'type': 'loss', 'content': 0.02117818407714367, 'timestamp': '2025-09-10 02:43:07.804138', 'step': 2932, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:07.856059', 'step': 2932, 'epoch': 2} +{'type': 'loss', 'content': 0.018351761624217033, 'timestamp': '2025-09-10 02:43:07.858291', 'step': 2933, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:07.911138', 'step': 2933, 'epoch': 2} +{'type': 'loss', 'content': 0.005453685764223337, 'timestamp': '2025-09-10 02:43:07.919621', 'step': 2934, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:07.977902', 'step': 2934, 'epoch': 2} +{'type': 'loss', 'content': 0.03870873898267746, 'timestamp': '2025-09-10 02:43:07.988274', 'step': 2935, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:08.041268', 'step': 2935, 'epoch': 2} +{'type': 'loss', 'content': 0.004993564449250698, 'timestamp': '2025-09-10 02:43:08.046929', 'step': 2936, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:08.099289', 'step': 2936, 'epoch': 2} +{'type': 'loss', 'content': 0.0022162613458931446, 'timestamp': '2025-09-10 02:43:08.105883', 'step': 2937, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:08.158771', 'step': 2937, 'epoch': 2} +{'type': 'loss', 'content': 0.011630414053797722, 'timestamp': '2025-09-10 02:43:08.162009', 'step': 2938, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:08.215042', 'step': 2938, 'epoch': 2} +{'type': 'loss', 'content': 0.05587531253695488, 'timestamp': '2025-09-10 02:43:08.217140', 'step': 2939, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:08.269605', 'step': 2939, 'epoch': 2} +{'type': 'loss', 'content': 0.02484690584242344, 'timestamp': '2025-09-10 02:43:08.275517', 'step': 2940, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:43:25.110434', 'step': 2940, 'epoch': 2} +{'type': 'pplx', 'content': 21483309.118461803, 'timestamp': '2025-09-10 02:43:25.114056', 'step': 2940, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:25.167624', 'step': 2940, 'epoch': 2} +{'type': 'loss', 'content': 0.008945746347308159, 'timestamp': '2025-09-10 02:43:25.176271', 'step': 2941, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:25.229778', 'step': 2941, 'epoch': 2} +{'type': 'loss', 'content': 0.0010575311025604606, 'timestamp': '2025-09-10 02:43:25.231925', 'step': 2942, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:25.285072', 'step': 2942, 'epoch': 2} +{'type': 'loss', 'content': 0.017733285203576088, 'timestamp': '2025-09-10 02:43:25.292965', 'step': 2943, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:25.345799', 'step': 2943, 'epoch': 2} +{'type': 'loss', 'content': 0.020000610500574112, 'timestamp': '2025-09-10 02:43:25.351792', 'step': 2944, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:25.404334', 'step': 2944, 'epoch': 2} +{'type': 'loss', 'content': 0.0038425668608397245, 'timestamp': '2025-09-10 02:43:25.407181', 'step': 2945, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 16320099139776.0}, 'timestamp': '2025-09-10 02:43:25.525936', 'step': 2945, 'epoch': 2} +{'type': 'loss', 'content': 0.0013542547821998596, 'timestamp': '2025-09-10 02:43:25.548897', 'step': 2946, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:25.602309', 'step': 2946, 'epoch': 2} +{'type': 'loss', 'content': 0.012076723389327526, 'timestamp': '2025-09-10 02:43:25.605144', 'step': 2947, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:25.657775', 'step': 2947, 'epoch': 2} +{'type': 'loss', 'content': 0.0017612642841413617, 'timestamp': '2025-09-10 02:43:25.665260', 'step': 2948, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:25.717887', 'step': 2948, 'epoch': 2} +{'type': 'loss', 'content': 0.016427170485258102, 'timestamp': '2025-09-10 02:43:25.727924', 'step': 2949, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:25.781393', 'step': 2949, 'epoch': 2} +{'type': 'loss', 'content': 0.0007146573625504971, 'timestamp': '2025-09-10 02:43:25.783612', 'step': 2950, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:25.836724', 'step': 2950, 'epoch': 2} +{'type': 'loss', 'content': 0.00922436360269785, 'timestamp': '2025-09-10 02:43:25.839532', 'step': 2951, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:25.892469', 'step': 2951, 'epoch': 2} +{'type': 'loss', 'content': 0.013483921065926552, 'timestamp': '2025-09-10 02:43:25.898357', 'step': 2952, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:43:25.958858', 'step': 2952, 'epoch': 2} +{'type': 'loss', 'content': 0.007663012947887182, 'timestamp': '2025-09-10 02:43:25.970632', 'step': 2953, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:26.023987', 'step': 2953, 'epoch': 2} +{'type': 'loss', 'content': 0.013566250912845135, 'timestamp': '2025-09-10 02:43:26.026185', 'step': 2954, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:26.079225', 'step': 2954, 'epoch': 2} +{'type': 'loss', 'content': 0.026658134534955025, 'timestamp': '2025-09-10 02:43:26.081360', 'step': 2955, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:26.133781', 'step': 2955, 'epoch': 2} +{'type': 'loss', 'content': 0.004819025285542011, 'timestamp': '2025-09-10 02:43:26.139451', 'step': 2956, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:26.192040', 'step': 2956, 'epoch': 2} +{'type': 'loss', 'content': 0.018547430634498596, 'timestamp': '2025-09-10 02:43:26.193957', 'step': 2957, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:26.247436', 'step': 2957, 'epoch': 2} +{'type': 'loss', 'content': 0.003229897003620863, 'timestamp': '2025-09-10 02:43:26.249411', 'step': 2958, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:26.302574', 'step': 2958, 'epoch': 2} +{'type': 'loss', 'content': 0.017530253157019615, 'timestamp': '2025-09-10 02:43:26.304676', 'step': 2959, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:26.357863', 'step': 2959, 'epoch': 2} +{'type': 'loss', 'content': 0.0028641356620937586, 'timestamp': '2025-09-10 02:43:26.368201', 'step': 2960, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:26.420948', 'step': 2960, 'epoch': 2} +{'type': 'loss', 'content': 0.02891319803893566, 'timestamp': '2025-09-10 02:43:26.431366', 'step': 2961, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:26.492639', 'step': 2961, 'epoch': 2} +{'type': 'loss', 'content': 0.020257892087101936, 'timestamp': '2025-09-10 02:43:26.503518', 'step': 2962, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:26.556643', 'step': 2962, 'epoch': 2} +{'type': 'loss', 'content': 0.0009795842925086617, 'timestamp': '2025-09-10 02:43:26.558940', 'step': 2963, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:26.611897', 'step': 2963, 'epoch': 2} +{'type': 'loss', 'content': 0.008834127336740494, 'timestamp': '2025-09-10 02:43:26.620767', 'step': 2964, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:26.673497', 'step': 2964, 'epoch': 2} +{'type': 'loss', 'content': 0.00043440479203127325, 'timestamp': '2025-09-10 02:43:26.675972', 'step': 2965, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:26.728566', 'step': 2965, 'epoch': 2} +{'type': 'loss', 'content': 0.0021795372013002634, 'timestamp': '2025-09-10 02:43:26.730892', 'step': 2966, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:26.784336', 'step': 2966, 'epoch': 2} +{'type': 'loss', 'content': 0.0014383898815140128, 'timestamp': '2025-09-10 02:43:26.786683', 'step': 2967, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:26.839298', 'step': 2967, 'epoch': 2} +{'type': 'loss', 'content': 0.009421980008482933, 'timestamp': '2025-09-10 02:43:26.845409', 'step': 2968, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:26.897998', 'step': 2968, 'epoch': 2} +{'type': 'loss', 'content': 0.011221504770219326, 'timestamp': '2025-09-10 02:43:26.904561', 'step': 2969, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:26.962882', 'step': 2969, 'epoch': 2} +{'type': 'loss', 'content': 0.0034413428511470556, 'timestamp': '2025-09-10 02:43:26.973354', 'step': 2970, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:27.026661', 'step': 2970, 'epoch': 2} +{'type': 'loss', 'content': 0.001014295150525868, 'timestamp': '2025-09-10 02:43:27.036297', 'step': 2971, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:27.089437', 'step': 2971, 'epoch': 2} +{'type': 'loss', 'content': 0.013005608692765236, 'timestamp': '2025-09-10 02:43:27.095241', 'step': 2972, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:27.153946', 'step': 2972, 'epoch': 2} +{'type': 'loss', 'content': 0.0037272910121828318, 'timestamp': '2025-09-10 02:43:27.165460', 'step': 2973, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:27.218882', 'step': 2973, 'epoch': 2} +{'type': 'loss', 'content': 0.01542600803077221, 'timestamp': '2025-09-10 02:43:27.221114', 'step': 2974, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:27.274092', 'step': 2974, 'epoch': 2} +{'type': 'loss', 'content': 0.0008530435152351856, 'timestamp': '2025-09-10 02:43:27.276210', 'step': 2975, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:27.328501', 'step': 2975, 'epoch': 2} +{'type': 'loss', 'content': 0.025617094710469246, 'timestamp': '2025-09-10 02:43:27.334339', 'step': 2976, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:27.386669', 'step': 2976, 'epoch': 2} +{'type': 'loss', 'content': 0.0015901281731203198, 'timestamp': '2025-09-10 02:43:27.393268', 'step': 2977, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:27.446658', 'step': 2977, 'epoch': 2} +{'type': 'loss', 'content': 0.009493803605437279, 'timestamp': '2025-09-10 02:43:27.454912', 'step': 2978, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:27.508045', 'step': 2978, 'epoch': 2} +{'type': 'loss', 'content': 0.0005454585188999772, 'timestamp': '2025-09-10 02:43:27.510304', 'step': 2979, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:27.562864', 'step': 2979, 'epoch': 2} +{'type': 'loss', 'content': 0.009057528339326382, 'timestamp': '2025-09-10 02:43:27.568763', 'step': 2980, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:27.620268', 'step': 2980, 'epoch': 2} +{'type': 'loss', 'content': 0.001526238163933158, 'timestamp': '2025-09-10 02:43:27.622443', 'step': 2981, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:27.674860', 'step': 2981, 'epoch': 2} +{'type': 'loss', 'content': 0.034560464322566986, 'timestamp': '2025-09-10 02:43:27.677102', 'step': 2982, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:27.729589', 'step': 2982, 'epoch': 2} +{'type': 'loss', 'content': 0.003290567547082901, 'timestamp': '2025-09-10 02:43:27.731812', 'step': 2983, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:27.784023', 'step': 2983, 'epoch': 2} +{'type': 'loss', 'content': 0.0036186235956847668, 'timestamp': '2025-09-10 02:43:27.789747', 'step': 2984, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:27.841717', 'step': 2984, 'epoch': 2} +{'type': 'loss', 'content': 0.00348859466612339, 'timestamp': '2025-09-10 02:43:27.848233', 'step': 2985, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:27.900963', 'step': 2985, 'epoch': 2} +{'type': 'loss', 'content': 0.003103638533502817, 'timestamp': '2025-09-10 02:43:27.903494', 'step': 2986, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:27.957033', 'step': 2986, 'epoch': 2} +{'type': 'loss', 'content': 0.010334305465221405, 'timestamp': '2025-09-10 02:43:27.966688', 'step': 2987, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:28.019562', 'step': 2987, 'epoch': 2} +{'type': 'loss', 'content': 0.009181870147585869, 'timestamp': '2025-09-10 02:43:28.025110', 'step': 2988, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:28.077095', 'step': 2988, 'epoch': 2} +{'type': 'loss', 'content': 0.002298856619745493, 'timestamp': '2025-09-10 02:43:28.080139', 'step': 2989, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:28.132786', 'step': 2989, 'epoch': 2} +{'type': 'loss', 'content': 0.0030902244616299868, 'timestamp': '2025-09-10 02:43:28.135066', 'step': 2990, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:28.187694', 'step': 2990, 'epoch': 2} +{'type': 'loss', 'content': 0.016290295869112015, 'timestamp': '2025-09-10 02:43:28.189886', 'step': 2991, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:28.242487', 'step': 2991, 'epoch': 2} +{'type': 'loss', 'content': 0.031161842867732048, 'timestamp': '2025-09-10 02:43:28.248035', 'step': 2992, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:28.300029', 'step': 2992, 'epoch': 2} +{'type': 'loss', 'content': 0.004563372116535902, 'timestamp': '2025-09-10 02:43:28.310251', 'step': 2993, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:28.363348', 'step': 2993, 'epoch': 2} +{'type': 'loss', 'content': 0.020457947626709938, 'timestamp': '2025-09-10 02:43:28.365532', 'step': 2994, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:28.418500', 'step': 2994, 'epoch': 2} +{'type': 'loss', 'content': 0.016411827877163887, 'timestamp': '2025-09-10 02:43:28.420537', 'step': 2995, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:28.473257', 'step': 2995, 'epoch': 2} +{'type': 'loss', 'content': 0.003593911649659276, 'timestamp': '2025-09-10 02:43:28.479031', 'step': 2996, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:28.532369', 'step': 2996, 'epoch': 2} +{'type': 'loss', 'content': 0.003384404582902789, 'timestamp': '2025-09-10 02:43:28.542849', 'step': 2997, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:28.597887', 'step': 2997, 'epoch': 2} +{'type': 'loss', 'content': 0.026975279673933983, 'timestamp': '2025-09-10 02:43:28.607653', 'step': 2998, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:28.661022', 'step': 2998, 'epoch': 2} +{'type': 'loss', 'content': 0.019041141495108604, 'timestamp': '2025-09-10 02:43:28.663132', 'step': 2999, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:28.716219', 'step': 2999, 'epoch': 2} +{'type': 'loss', 'content': 0.01516516599804163, 'timestamp': '2025-09-10 02:43:28.722091', 'step': 3000, 'epoch': 2} +{'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-10 02:43:29.135530', 'step': 3000, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:29.190119', 'step': 3000, 'epoch': 2} +{'type': 'loss', 'content': 0.021581441164016724, 'timestamp': '2025-09-10 02:43:29.197170', 'step': 3001, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:29.250756', 'step': 3001, 'epoch': 2} +{'type': 'loss', 'content': 0.012467102147638798, 'timestamp': '2025-09-10 02:43:29.252711', 'step': 3002, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:29.305695', 'step': 3002, 'epoch': 2} +{'type': 'loss', 'content': 0.00976636353880167, 'timestamp': '2025-09-10 02:43:29.308062', 'step': 3003, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:29.360893', 'step': 3003, 'epoch': 2} +{'type': 'loss', 'content': 0.009900376200675964, 'timestamp': '2025-09-10 02:43:29.366900', 'step': 3004, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:29.419313', 'step': 3004, 'epoch': 2} +{'type': 'loss', 'content': 0.0017990581691265106, 'timestamp': '2025-09-10 02:43:29.422209', 'step': 3005, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:29.474942', 'step': 3005, 'epoch': 2} +{'type': 'loss', 'content': 0.001193814561702311, 'timestamp': '2025-09-10 02:43:29.477148', 'step': 3006, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:29.543270', 'step': 3006, 'epoch': 2} +{'type': 'loss', 'content': 0.005569125525653362, 'timestamp': '2025-09-10 02:43:29.555493', 'step': 3007, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:29.608379', 'step': 3007, 'epoch': 2} +{'type': 'loss', 'content': 0.0012687196722254157, 'timestamp': '2025-09-10 02:43:29.614183', 'step': 3008, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:29.666281', 'step': 3008, 'epoch': 2} +{'type': 'loss', 'content': 0.02738204412162304, 'timestamp': '2025-09-10 02:43:29.669218', 'step': 3009, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:29.730388', 'step': 3009, 'epoch': 2} +{'type': 'loss', 'content': 0.017042160034179688, 'timestamp': '2025-09-10 02:43:29.741349', 'step': 3010, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:29.794784', 'step': 3010, 'epoch': 2} +{'type': 'loss', 'content': 0.024572748690843582, 'timestamp': '2025-09-10 02:43:29.797083', 'step': 3011, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:29.849592', 'step': 3011, 'epoch': 2} +{'type': 'loss', 'content': 0.01295087393373251, 'timestamp': '2025-09-10 02:43:29.856873', 'step': 3012, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:29.910152', 'step': 3012, 'epoch': 2} +{'type': 'loss', 'content': 0.0017828369745984674, 'timestamp': '2025-09-10 02:43:29.919335', 'step': 3013, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:29.973569', 'step': 3013, 'epoch': 2} +{'type': 'loss', 'content': 0.021940140053629875, 'timestamp': '2025-09-10 02:43:29.983341', 'step': 3014, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:30.036946', 'step': 3014, 'epoch': 2} +{'type': 'loss', 'content': 0.029555542394518852, 'timestamp': '2025-09-10 02:43:30.040337', 'step': 3015, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:30.093302', 'step': 3015, 'epoch': 2} +{'type': 'loss', 'content': 0.0069861263036727905, 'timestamp': '2025-09-10 02:43:30.099309', 'step': 3016, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:30.151226', 'step': 3016, 'epoch': 2} +{'type': 'loss', 'content': 0.03783723711967468, 'timestamp': '2025-09-10 02:43:30.154246', 'step': 3017, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:30.212382', 'step': 3017, 'epoch': 2} +{'type': 'loss', 'content': 0.014050469733774662, 'timestamp': '2025-09-10 02:43:30.214522', 'step': 3018, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:30.267609', 'step': 3018, 'epoch': 2} +{'type': 'loss', 'content': 0.0017659427830949426, 'timestamp': '2025-09-10 02:43:30.269914', 'step': 3019, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:30.323089', 'step': 3019, 'epoch': 2} +{'type': 'loss', 'content': 0.009280859492719173, 'timestamp': '2025-09-10 02:43:30.329020', 'step': 3020, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:30.393798', 'step': 3020, 'epoch': 2} +{'type': 'loss', 'content': 0.00999439973384142, 'timestamp': '2025-09-10 02:43:30.403774', 'step': 3021, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:30.458967', 'step': 3021, 'epoch': 2} +{'type': 'loss', 'content': 0.0006523417541757226, 'timestamp': '2025-09-10 02:43:30.464946', 'step': 3022, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:30.521783', 'step': 3022, 'epoch': 2} +{'type': 'loss', 'content': 0.010792514309287071, 'timestamp': '2025-09-10 02:43:30.523855', 'step': 3023, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:30.578078', 'step': 3023, 'epoch': 2} +{'type': 'loss', 'content': 0.042817894369363785, 'timestamp': '2025-09-10 02:43:30.586583', 'step': 3024, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:30.644695', 'step': 3024, 'epoch': 2} +{'type': 'loss', 'content': 0.0024986821226775646, 'timestamp': '2025-09-10 02:43:30.646978', 'step': 3025, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:30.699817', 'step': 3025, 'epoch': 2} +{'type': 'loss', 'content': 0.008581001311540604, 'timestamp': '2025-09-10 02:43:30.702097', 'step': 3026, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:30.754664', 'step': 3026, 'epoch': 2} +{'type': 'loss', 'content': 0.033836524933576584, 'timestamp': '2025-09-10 02:43:30.758201', 'step': 3027, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:30.812040', 'step': 3027, 'epoch': 2} +{'type': 'loss', 'content': 0.003424307331442833, 'timestamp': '2025-09-10 02:43:30.825697', 'step': 3028, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:30.879359', 'step': 3028, 'epoch': 2} +{'type': 'loss', 'content': 0.0014254000270739198, 'timestamp': '2025-09-10 02:43:30.885922', 'step': 3029, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:30.940311', 'step': 3029, 'epoch': 2} +{'type': 'loss', 'content': 0.016321172937750816, 'timestamp': '2025-09-10 02:43:30.942351', 'step': 3030, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:43:31.011323', 'step': 3030, 'epoch': 2} +{'type': 'loss', 'content': 0.046935852617025375, 'timestamp': '2025-09-10 02:43:31.024048', 'step': 3031, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:31.079782', 'step': 3031, 'epoch': 2} +{'type': 'loss', 'content': 0.012795096263289452, 'timestamp': '2025-09-10 02:43:31.085585', 'step': 3032, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:31.142970', 'step': 3032, 'epoch': 2} +{'type': 'loss', 'content': 0.0052894712425768375, 'timestamp': '2025-09-10 02:43:31.144960', 'step': 3033, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:31.207978', 'step': 3033, 'epoch': 2} +{'type': 'loss', 'content': 0.0034933737479150295, 'timestamp': '2025-09-10 02:43:31.212348', 'step': 3034, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:31.268244', 'step': 3034, 'epoch': 2} +{'type': 'loss', 'content': 0.024392152205109596, 'timestamp': '2025-09-10 02:43:31.276394', 'step': 3035, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:31.332729', 'step': 3035, 'epoch': 2} +{'type': 'loss', 'content': 0.007851941511034966, 'timestamp': '2025-09-10 02:43:31.338655', 'step': 3036, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:31.398572', 'step': 3036, 'epoch': 2} +{'type': 'loss', 'content': 0.009740768000483513, 'timestamp': '2025-09-10 02:43:31.410102', 'step': 3037, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:31.462894', 'step': 3037, 'epoch': 2} +{'type': 'loss', 'content': 0.0092580895870924, 'timestamp': '2025-09-10 02:43:31.465050', 'step': 3038, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:31.519301', 'step': 3038, 'epoch': 2} +{'type': 'loss', 'content': 0.00213812873698771, 'timestamp': '2025-09-10 02:43:31.522890', 'step': 3039, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:31.577935', 'step': 3039, 'epoch': 2} +{'type': 'loss', 'content': 0.020063860341906548, 'timestamp': '2025-09-10 02:43:31.583987', 'step': 3040, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:31.636011', 'step': 3040, 'epoch': 2} +{'type': 'loss', 'content': 0.006031975150108337, 'timestamp': '2025-09-10 02:43:31.638253', 'step': 3041, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:31.704842', 'step': 3041, 'epoch': 2} +{'type': 'loss', 'content': 0.018207212910056114, 'timestamp': '2025-09-10 02:43:31.717075', 'step': 3042, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:31.776423', 'step': 3042, 'epoch': 2} +{'type': 'loss', 'content': 0.016082150861620903, 'timestamp': '2025-09-10 02:43:31.780024', 'step': 3043, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:31.834293', 'step': 3043, 'epoch': 2} +{'type': 'loss', 'content': 0.013052371330559254, 'timestamp': '2025-09-10 02:43:31.840338', 'step': 3044, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:31.900986', 'step': 3044, 'epoch': 2} +{'type': 'loss', 'content': 0.016128752380609512, 'timestamp': '2025-09-10 02:43:31.903404', 'step': 3045, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:31.956565', 'step': 3045, 'epoch': 2} +{'type': 'loss', 'content': 0.01358273159712553, 'timestamp': '2025-09-10 02:43:31.958910', 'step': 3046, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:32.012499', 'step': 3046, 'epoch': 2} +{'type': 'loss', 'content': 0.0029387916438281536, 'timestamp': '2025-09-10 02:43:32.022110', 'step': 3047, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:32.076197', 'step': 3047, 'epoch': 2} +{'type': 'loss', 'content': 0.009799973107874393, 'timestamp': '2025-09-10 02:43:32.082147', 'step': 3048, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:32.134403', 'step': 3048, 'epoch': 2} +{'type': 'loss', 'content': 0.005152401048690081, 'timestamp': '2025-09-10 02:43:32.136778', 'step': 3049, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:32.189458', 'step': 3049, 'epoch': 2} +{'type': 'loss', 'content': 0.01178047340363264, 'timestamp': '2025-09-10 02:43:32.191898', 'step': 3050, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:32.244657', 'step': 3050, 'epoch': 2} +{'type': 'loss', 'content': 0.0021244531963020563, 'timestamp': '2025-09-10 02:43:32.247452', 'step': 3051, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:32.300595', 'step': 3051, 'epoch': 2} +{'type': 'loss', 'content': 0.004618776496499777, 'timestamp': '2025-09-10 02:43:32.307720', 'step': 3052, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:43:32.375034', 'step': 3052, 'epoch': 2} +{'type': 'loss', 'content': 0.014385617338120937, 'timestamp': '2025-09-10 02:43:32.388783', 'step': 3053, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:32.441947', 'step': 3053, 'epoch': 2} +{'type': 'loss', 'content': 0.005455045960843563, 'timestamp': '2025-09-10 02:43:32.449845', 'step': 3054, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:32.503765', 'step': 3054, 'epoch': 2} +{'type': 'loss', 'content': 0.0028096141759306192, 'timestamp': '2025-09-10 02:43:32.505934', 'step': 3055, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:32.558732', 'step': 3055, 'epoch': 2} +{'type': 'loss', 'content': 0.01378645095974207, 'timestamp': '2025-09-10 02:43:32.564709', 'step': 3056, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:32.616970', 'step': 3056, 'epoch': 2} +{'type': 'loss', 'content': 0.026725469157099724, 'timestamp': '2025-09-10 02:43:32.619260', 'step': 3057, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:32.674191', 'step': 3057, 'epoch': 2} +{'type': 'loss', 'content': 0.002781761111691594, 'timestamp': '2025-09-10 02:43:32.683932', 'step': 3058, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:32.737790', 'step': 3058, 'epoch': 2} +{'type': 'loss', 'content': 0.00698810676112771, 'timestamp': '2025-09-10 02:43:32.739839', 'step': 3059, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:32.793527', 'step': 3059, 'epoch': 2} +{'type': 'loss', 'content': 0.01064991019666195, 'timestamp': '2025-09-10 02:43:32.801733', 'step': 3060, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:32.854959', 'step': 3060, 'epoch': 2} +{'type': 'loss', 'content': 0.022440778091549873, 'timestamp': '2025-09-10 02:43:32.857261', 'step': 3061, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:32.912231', 'step': 3061, 'epoch': 2} +{'type': 'loss', 'content': 0.007607771549373865, 'timestamp': '2025-09-10 02:43:32.922054', 'step': 3062, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:32.975055', 'step': 3062, 'epoch': 2} +{'type': 'loss', 'content': 0.0037695816718041897, 'timestamp': '2025-09-10 02:43:32.977337', 'step': 3063, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:33.038566', 'step': 3063, 'epoch': 2} +{'type': 'loss', 'content': 0.0025553121231496334, 'timestamp': '2025-09-10 02:43:33.044242', 'step': 3064, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:33.100173', 'step': 3064, 'epoch': 2} +{'type': 'loss', 'content': 0.0130363954231143, 'timestamp': '2025-09-10 02:43:33.111409', 'step': 3065, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:33.164383', 'step': 3065, 'epoch': 2} +{'type': 'loss', 'content': 0.009316898882389069, 'timestamp': '2025-09-10 02:43:33.166577', 'step': 3066, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:33.219644', 'step': 3066, 'epoch': 2} +{'type': 'loss', 'content': 0.015726543962955475, 'timestamp': '2025-09-10 02:43:33.221895', 'step': 3067, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:33.279597', 'step': 3067, 'epoch': 2} +{'type': 'loss', 'content': 0.011039117351174355, 'timestamp': '2025-09-10 02:43:33.290770', 'step': 3068, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:33.343898', 'step': 3068, 'epoch': 2} +{'type': 'loss', 'content': 0.018892407417297363, 'timestamp': '2025-09-10 02:43:33.346167', 'step': 3069, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:33.398989', 'step': 3069, 'epoch': 2} +{'type': 'loss', 'content': 0.01331985741853714, 'timestamp': '2025-09-10 02:43:33.405118', 'step': 3070, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:33.458787', 'step': 3070, 'epoch': 2} +{'type': 'loss', 'content': 0.00435601407662034, 'timestamp': '2025-09-10 02:43:33.461021', 'step': 3071, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:33.513781', 'step': 3071, 'epoch': 2} +{'type': 'loss', 'content': 0.0037311671767383814, 'timestamp': '2025-09-10 02:43:33.519614', 'step': 3072, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:33.571982', 'step': 3072, 'epoch': 2} +{'type': 'loss', 'content': 0.012653851881623268, 'timestamp': '2025-09-10 02:43:33.578301', 'step': 3073, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:33.639183', 'step': 3073, 'epoch': 2} +{'type': 'loss', 'content': 0.001527549116872251, 'timestamp': '2025-09-10 02:43:33.649892', 'step': 3074, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:33.702982', 'step': 3074, 'epoch': 2} +{'type': 'loss', 'content': 0.00562877906486392, 'timestamp': '2025-09-10 02:43:33.711100', 'step': 3075, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:33.777943', 'step': 3075, 'epoch': 2} +{'type': 'loss', 'content': 0.016573479399085045, 'timestamp': '2025-09-10 02:43:33.790945', 'step': 3076, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:33.843771', 'step': 3076, 'epoch': 2} +{'type': 'loss', 'content': 0.0036496755201369524, 'timestamp': '2025-09-10 02:43:33.852265', 'step': 3077, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:33.905328', 'step': 3077, 'epoch': 2} +{'type': 'loss', 'content': 0.0029678826685994864, 'timestamp': '2025-09-10 02:43:33.907408', 'step': 3078, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:33.960337', 'step': 3078, 'epoch': 2} +{'type': 'loss', 'content': 0.011269161477684975, 'timestamp': '2025-09-10 02:43:33.968487', 'step': 3079, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:34.021507', 'step': 3079, 'epoch': 2} +{'type': 'loss', 'content': 0.005516494624316692, 'timestamp': '2025-09-10 02:43:34.027567', 'step': 3080, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:34.080162', 'step': 3080, 'epoch': 2} +{'type': 'loss', 'content': 0.007762841880321503, 'timestamp': '2025-09-10 02:43:34.082418', 'step': 3081, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:34.136858', 'step': 3081, 'epoch': 2} +{'type': 'loss', 'content': 0.00736891059204936, 'timestamp': '2025-09-10 02:43:34.139384', 'step': 3082, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:34.193981', 'step': 3082, 'epoch': 2} +{'type': 'loss', 'content': 0.0037586067337542772, 'timestamp': '2025-09-10 02:43:34.196395', 'step': 3083, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:34.249866', 'step': 3083, 'epoch': 2} +{'type': 'loss', 'content': 0.010520155541598797, 'timestamp': '2025-09-10 02:43:34.255946', 'step': 3084, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:34.308338', 'step': 3084, 'epoch': 2} +{'type': 'loss', 'content': 0.014333308674395084, 'timestamp': '2025-09-10 02:43:34.310443', 'step': 3085, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:34.378589', 'step': 3085, 'epoch': 2} +{'type': 'loss', 'content': 0.0008122967556118965, 'timestamp': '2025-09-10 02:43:34.391169', 'step': 3086, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:34.444024', 'step': 3086, 'epoch': 2} +{'type': 'loss', 'content': 0.008164488710463047, 'timestamp': '2025-09-10 02:43:34.445951', 'step': 3087, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:43:51.268366', 'step': 3087, 'epoch': 2} +{'type': 'pplx', 'content': 19060472.29738327, 'timestamp': '2025-09-10 02:43:51.271269', 'step': 3087, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:51.326149', 'step': 3087, 'epoch': 2} +{'type': 'loss', 'content': 0.0016778473509475589, 'timestamp': '2025-09-10 02:43:51.335114', 'step': 3088, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:51.391894', 'step': 3088, 'epoch': 2} +{'type': 'loss', 'content': 0.00943154189735651, 'timestamp': '2025-09-10 02:43:51.393896', 'step': 3089, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:51.447082', 'step': 3089, 'epoch': 2} +{'type': 'loss', 'content': 0.0010285828029736876, 'timestamp': '2025-09-10 02:43:51.454967', 'step': 3090, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:51.508167', 'step': 3090, 'epoch': 2} +{'type': 'loss', 'content': 0.013816392049193382, 'timestamp': '2025-09-10 02:43:51.510348', 'step': 3091, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:51.563358', 'step': 3091, 'epoch': 2} +{'type': 'loss', 'content': 0.017100904136896133, 'timestamp': '2025-09-10 02:43:51.569068', 'step': 3092, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:51.625468', 'step': 3092, 'epoch': 2} +{'type': 'loss', 'content': 0.035547781735658646, 'timestamp': '2025-09-10 02:43:51.636656', 'step': 3093, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:51.703242', 'step': 3093, 'epoch': 2} +{'type': 'loss', 'content': 0.0028612406458705664, 'timestamp': '2025-09-10 02:43:51.715475', 'step': 3094, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:51.768669', 'step': 3094, 'epoch': 2} +{'type': 'loss', 'content': 0.0025799472350627184, 'timestamp': '2025-09-10 02:43:51.770680', 'step': 3095, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:51.823582', 'step': 3095, 'epoch': 2} +{'type': 'loss', 'content': 0.003510661656036973, 'timestamp': '2025-09-10 02:43:51.829206', 'step': 3096, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:51.880893', 'step': 3096, 'epoch': 2} +{'type': 'loss', 'content': 0.002644149586558342, 'timestamp': '2025-09-10 02:43:51.882846', 'step': 3097, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:51.935425', 'step': 3097, 'epoch': 2} +{'type': 'loss', 'content': 0.014366241171956062, 'timestamp': '2025-09-10 02:43:51.937587', 'step': 3098, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:51.990155', 'step': 3098, 'epoch': 2} +{'type': 'loss', 'content': 0.009584962390363216, 'timestamp': '2025-09-10 02:43:51.992352', 'step': 3099, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:52.045106', 'step': 3099, 'epoch': 2} +{'type': 'loss', 'content': 0.0015968400985002518, 'timestamp': '2025-09-10 02:43:52.050702', 'step': 3100, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:52.102968', 'step': 3100, 'epoch': 2} +{'type': 'loss', 'content': 0.021843912079930305, 'timestamp': '2025-09-10 02:43:52.105129', 'step': 3101, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:52.157611', 'step': 3101, 'epoch': 2} +{'type': 'loss', 'content': 0.001562813064083457, 'timestamp': '2025-09-10 02:43:52.159682', 'step': 3102, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:52.214144', 'step': 3102, 'epoch': 2} +{'type': 'loss', 'content': 0.011524799279868603, 'timestamp': '2025-09-10 02:43:52.223953', 'step': 3103, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:52.277894', 'step': 3103, 'epoch': 2} +{'type': 'loss', 'content': 0.0034488060045987368, 'timestamp': '2025-09-10 02:43:52.283749', 'step': 3104, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:52.336350', 'step': 3104, 'epoch': 2} +{'type': 'loss', 'content': 0.026885902509093285, 'timestamp': '2025-09-10 02:43:52.343105', 'step': 3105, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:52.396339', 'step': 3105, 'epoch': 2} +{'type': 'loss', 'content': 0.009301887825131416, 'timestamp': '2025-09-10 02:43:52.398554', 'step': 3106, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:52.456646', 'step': 3106, 'epoch': 2} +{'type': 'loss', 'content': 0.0008182553574442863, 'timestamp': '2025-09-10 02:43:52.467083', 'step': 3107, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:52.520631', 'step': 3107, 'epoch': 2} +{'type': 'loss', 'content': 0.007265047635883093, 'timestamp': '2025-09-10 02:43:52.526317', 'step': 3108, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:52.578635', 'step': 3108, 'epoch': 2} +{'type': 'loss', 'content': 0.005355027038604021, 'timestamp': '2025-09-10 02:43:52.580901', 'step': 3109, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:52.637902', 'step': 3109, 'epoch': 2} +{'type': 'loss', 'content': 0.009155571460723877, 'timestamp': '2025-09-10 02:43:52.646178', 'step': 3110, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:52.699468', 'step': 3110, 'epoch': 2} +{'type': 'loss', 'content': 0.002473545726388693, 'timestamp': '2025-09-10 02:43:52.707732', 'step': 3111, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:52.761110', 'step': 3111, 'epoch': 2} +{'type': 'loss', 'content': 0.0006904462352395058, 'timestamp': '2025-09-10 02:43:52.767917', 'step': 3112, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:52.820750', 'step': 3112, 'epoch': 2} +{'type': 'loss', 'content': 0.008953842334449291, 'timestamp': '2025-09-10 02:43:52.822737', 'step': 3113, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:52.875172', 'step': 3113, 'epoch': 2} +{'type': 'loss', 'content': 0.006417619530111551, 'timestamp': '2025-09-10 02:43:52.877364', 'step': 3114, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:43:52.950713', 'step': 3114, 'epoch': 2} +{'type': 'loss', 'content': 0.0418584868311882, 'timestamp': '2025-09-10 02:43:52.964456', 'step': 3115, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:53.018738', 'step': 3115, 'epoch': 2} +{'type': 'loss', 'content': 0.030400361865758896, 'timestamp': '2025-09-10 02:43:53.029346', 'step': 3116, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:53.081389', 'step': 3116, 'epoch': 2} +{'type': 'loss', 'content': 0.008601049892604351, 'timestamp': '2025-09-10 02:43:53.083440', 'step': 3117, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:53.136333', 'step': 3117, 'epoch': 2} +{'type': 'loss', 'content': 0.009945330210030079, 'timestamp': '2025-09-10 02:43:53.138324', 'step': 3118, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:53.192728', 'step': 3118, 'epoch': 2} +{'type': 'loss', 'content': 0.018205152824521065, 'timestamp': '2025-09-10 02:43:53.202531', 'step': 3119, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:43:53.264025', 'step': 3119, 'epoch': 2} +{'type': 'loss', 'content': 0.005881735123693943, 'timestamp': '2025-09-10 02:43:53.275918', 'step': 3120, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:53.327833', 'step': 3120, 'epoch': 2} +{'type': 'loss', 'content': 0.012508176267147064, 'timestamp': '2025-09-10 02:43:53.329786', 'step': 3121, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:53.381846', 'step': 3121, 'epoch': 2} +{'type': 'loss', 'content': 0.01045707706362009, 'timestamp': '2025-09-10 02:43:53.383931', 'step': 3122, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:53.436349', 'step': 3122, 'epoch': 2} +{'type': 'loss', 'content': 0.005674861371517181, 'timestamp': '2025-09-10 02:43:53.439166', 'step': 3123, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:53.505364', 'step': 3123, 'epoch': 2} +{'type': 'loss', 'content': 0.0021117881406098604, 'timestamp': '2025-09-10 02:43:53.518409', 'step': 3124, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:53.570274', 'step': 3124, 'epoch': 2} +{'type': 'loss', 'content': 0.003709127428010106, 'timestamp': '2025-09-10 02:43:53.572301', 'step': 3125, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:53.624394', 'step': 3125, 'epoch': 2} +{'type': 'loss', 'content': 0.005707950331270695, 'timestamp': '2025-09-10 02:43:53.626620', 'step': 3126, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:53.679805', 'step': 3126, 'epoch': 2} +{'type': 'loss', 'content': 0.02047325111925602, 'timestamp': '2025-09-10 02:43:53.689387', 'step': 3127, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:53.742759', 'step': 3127, 'epoch': 2} +{'type': 'loss', 'content': 0.0044165621511638165, 'timestamp': '2025-09-10 02:43:53.748383', 'step': 3128, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:53.800726', 'step': 3128, 'epoch': 2} +{'type': 'loss', 'content': 0.007385820150375366, 'timestamp': '2025-09-10 02:43:53.802670', 'step': 3129, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:53.856039', 'step': 3129, 'epoch': 2} +{'type': 'loss', 'content': 0.007362124510109425, 'timestamp': '2025-09-10 02:43:53.865656', 'step': 3130, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:53.918940', 'step': 3130, 'epoch': 2} +{'type': 'loss', 'content': 0.0003541136684361845, 'timestamp': '2025-09-10 02:43:53.920941', 'step': 3131, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:53.973464', 'step': 3131, 'epoch': 2} +{'type': 'loss', 'content': 0.00143281405325979, 'timestamp': '2025-09-10 02:43:53.979345', 'step': 3132, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:54.031131', 'step': 3132, 'epoch': 2} +{'type': 'loss', 'content': 0.0071766795590519905, 'timestamp': '2025-09-10 02:43:54.033169', 'step': 3133, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:54.085526', 'step': 3133, 'epoch': 2} +{'type': 'loss', 'content': 0.004538625478744507, 'timestamp': '2025-09-10 02:43:54.087804', 'step': 3134, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:54.140738', 'step': 3134, 'epoch': 2} +{'type': 'loss', 'content': 0.016915742307901382, 'timestamp': '2025-09-10 02:43:54.149002', 'step': 3135, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:54.201621', 'step': 3135, 'epoch': 2} +{'type': 'loss', 'content': 0.01447251532226801, 'timestamp': '2025-09-10 02:43:54.207234', 'step': 3136, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:54.259235', 'step': 3136, 'epoch': 2} +{'type': 'loss', 'content': 0.027732182294130325, 'timestamp': '2025-09-10 02:43:54.261568', 'step': 3137, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:54.314127', 'step': 3137, 'epoch': 2} +{'type': 'loss', 'content': 0.0018878389382734895, 'timestamp': '2025-09-10 02:43:54.317012', 'step': 3138, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:54.378074', 'step': 3138, 'epoch': 2} +{'type': 'loss', 'content': 0.011497820727527142, 'timestamp': '2025-09-10 02:43:54.388849', 'step': 3139, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:54.441592', 'step': 3139, 'epoch': 2} +{'type': 'loss', 'content': 0.0032113136257976294, 'timestamp': '2025-09-10 02:43:54.447117', 'step': 3140, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:54.513012', 'step': 3140, 'epoch': 2} +{'type': 'loss', 'content': 0.00036818793159909546, 'timestamp': '2025-09-10 02:43:54.526626', 'step': 3141, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:54.580467', 'step': 3141, 'epoch': 2} +{'type': 'loss', 'content': 0.0006413311348296702, 'timestamp': '2025-09-10 02:43:54.590089', 'step': 3142, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:43:54.656624', 'step': 3142, 'epoch': 2} +{'type': 'loss', 'content': 0.032100610435009, 'timestamp': '2025-09-10 02:43:54.668776', 'step': 3143, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:54.721298', 'step': 3143, 'epoch': 2} +{'type': 'loss', 'content': 0.009853010065853596, 'timestamp': '2025-09-10 02:43:54.726898', 'step': 3144, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:43:54.786069', 'step': 3144, 'epoch': 2} +{'type': 'loss', 'content': 0.005006527062505484, 'timestamp': '2025-09-10 02:43:54.797833', 'step': 3145, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:54.850532', 'step': 3145, 'epoch': 2} +{'type': 'loss', 'content': 0.0006793158245272934, 'timestamp': '2025-09-10 02:43:54.852494', 'step': 3146, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:54.904627', 'step': 3146, 'epoch': 2} +{'type': 'loss', 'content': 0.006459634751081467, 'timestamp': '2025-09-10 02:43:54.906595', 'step': 3147, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:43:54.986880', 'step': 3147, 'epoch': 2} +{'type': 'loss', 'content': 0.011387155391275883, 'timestamp': '2025-09-10 02:43:55.002755', 'step': 3148, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:55.055781', 'step': 3148, 'epoch': 2} +{'type': 'loss', 'content': 0.008111325092613697, 'timestamp': '2025-09-10 02:43:55.057805', 'step': 3149, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:55.110230', 'step': 3149, 'epoch': 2} +{'type': 'loss', 'content': 0.024190669879317284, 'timestamp': '2025-09-10 02:43:55.112563', 'step': 3150, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:55.165300', 'step': 3150, 'epoch': 2} +{'type': 'loss', 'content': 0.013140763156116009, 'timestamp': '2025-09-10 02:43:55.167406', 'step': 3151, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:55.219737', 'step': 3151, 'epoch': 2} +{'type': 'loss', 'content': 0.00048197299474850297, 'timestamp': '2025-09-10 02:43:55.225441', 'step': 3152, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:55.281989', 'step': 3152, 'epoch': 2} +{'type': 'loss', 'content': 0.036931704729795456, 'timestamp': '2025-09-10 02:43:55.293235', 'step': 3153, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:55.353910', 'step': 3153, 'epoch': 2} +{'type': 'loss', 'content': 0.005720453802496195, 'timestamp': '2025-09-10 02:43:55.364658', 'step': 3154, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:55.417547', 'step': 3154, 'epoch': 2} +{'type': 'loss', 'content': 0.0008417764329351485, 'timestamp': '2025-09-10 02:43:55.419836', 'step': 3155, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:55.472590', 'step': 3155, 'epoch': 2} +{'type': 'loss', 'content': 0.0033593338448554277, 'timestamp': '2025-09-10 02:43:55.478370', 'step': 3156, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:55.530383', 'step': 3156, 'epoch': 2} +{'type': 'loss', 'content': 0.009389621205627918, 'timestamp': '2025-09-10 02:43:55.532142', 'step': 3157, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:43:55.584168', 'step': 3157, 'epoch': 2} +{'type': 'loss', 'content': 0.007109578233212233, 'timestamp': '2025-09-10 02:43:55.586178', 'step': 3158, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:55.640301', 'step': 3158, 'epoch': 2} +{'type': 'loss', 'content': 0.0009504547342658043, 'timestamp': '2025-09-10 02:43:55.650098', 'step': 3159, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:55.702442', 'step': 3159, 'epoch': 2} +{'type': 'loss', 'content': 0.000521921378094703, 'timestamp': '2025-09-10 02:43:55.707964', 'step': 3160, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:55.766774', 'step': 3160, 'epoch': 2} +{'type': 'loss', 'content': 0.010958666913211346, 'timestamp': '2025-09-10 02:43:55.778348', 'step': 3161, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:55.831771', 'step': 3161, 'epoch': 2} +{'type': 'loss', 'content': 0.021725790575146675, 'timestamp': '2025-09-10 02:43:55.834031', 'step': 3162, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:55.886775', 'step': 3162, 'epoch': 2} +{'type': 'loss', 'content': 0.003122693160548806, 'timestamp': '2025-09-10 02:43:55.895002', 'step': 3163, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:55.948007', 'step': 3163, 'epoch': 2} +{'type': 'loss', 'content': 0.003681901143863797, 'timestamp': '2025-09-10 02:43:55.953550', 'step': 3164, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:56.005598', 'step': 3164, 'epoch': 2} +{'type': 'loss', 'content': 0.0014724161010235548, 'timestamp': '2025-09-10 02:43:56.007850', 'step': 3165, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:56.060591', 'step': 3165, 'epoch': 2} +{'type': 'loss', 'content': 0.00038874123129062355, 'timestamp': '2025-09-10 02:43:56.062884', 'step': 3166, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:56.115488', 'step': 3166, 'epoch': 2} +{'type': 'loss', 'content': 0.011899313889443874, 'timestamp': '2025-09-10 02:43:56.117843', 'step': 3167, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:56.171108', 'step': 3167, 'epoch': 2} +{'type': 'loss', 'content': 0.0036208501551300287, 'timestamp': '2025-09-10 02:43:56.176671', 'step': 3168, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:56.235418', 'step': 3168, 'epoch': 2} +{'type': 'loss', 'content': 0.0023371472489088774, 'timestamp': '2025-09-10 02:43:56.246940', 'step': 3169, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:56.299613', 'step': 3169, 'epoch': 2} +{'type': 'loss', 'content': 0.010267443023622036, 'timestamp': '2025-09-10 02:43:56.301647', 'step': 3170, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:56.355144', 'step': 3170, 'epoch': 2} +{'type': 'loss', 'content': 0.0011791570577770472, 'timestamp': '2025-09-10 02:43:56.357386', 'step': 3171, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:56.410709', 'step': 3171, 'epoch': 2} +{'type': 'loss', 'content': 0.0022798667196184397, 'timestamp': '2025-09-10 02:43:56.417652', 'step': 3172, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:56.470602', 'step': 3172, 'epoch': 2} +{'type': 'loss', 'content': 0.0019373170798644423, 'timestamp': '2025-09-10 02:43:56.477095', 'step': 3173, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:56.537779', 'step': 3173, 'epoch': 2} +{'type': 'loss', 'content': 0.004268851596862078, 'timestamp': '2025-09-10 02:43:56.548435', 'step': 3174, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:56.606077', 'step': 3174, 'epoch': 2} +{'type': 'loss', 'content': 0.01142526138573885, 'timestamp': '2025-09-10 02:43:56.616494', 'step': 3175, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:56.669824', 'step': 3175, 'epoch': 2} +{'type': 'loss', 'content': 0.02694218046963215, 'timestamp': '2025-09-10 02:43:56.676994', 'step': 3176, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:56.728985', 'step': 3176, 'epoch': 2} +{'type': 'loss', 'content': 0.016898388043045998, 'timestamp': '2025-09-10 02:43:56.730973', 'step': 3177, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:56.784069', 'step': 3177, 'epoch': 2} +{'type': 'loss', 'content': 0.0030371483881026506, 'timestamp': '2025-09-10 02:43:56.786105', 'step': 3178, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:56.838255', 'step': 3178, 'epoch': 2} +{'type': 'loss', 'content': 0.04292638227343559, 'timestamp': '2025-09-10 02:43:56.841522', 'step': 3179, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:43:56.893485', 'step': 3179, 'epoch': 2} +{'type': 'loss', 'content': 0.00027255882741883397, 'timestamp': '2025-09-10 02:43:56.899193', 'step': 3180, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:56.951834', 'step': 3180, 'epoch': 2} +{'type': 'loss', 'content': 0.0032448743004351854, 'timestamp': '2025-09-10 02:43:56.954023', 'step': 3181, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:57.006707', 'step': 3181, 'epoch': 2} +{'type': 'loss', 'content': 0.0014674147823825479, 'timestamp': '2025-09-10 02:43:57.013425', 'step': 3182, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:57.066772', 'step': 3182, 'epoch': 2} +{'type': 'loss', 'content': 0.012098170816898346, 'timestamp': '2025-09-10 02:43:57.068850', 'step': 3183, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:57.121711', 'step': 3183, 'epoch': 2} +{'type': 'loss', 'content': 0.041172586381435394, 'timestamp': '2025-09-10 02:43:57.127373', 'step': 3184, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:57.179558', 'step': 3184, 'epoch': 2} +{'type': 'loss', 'content': 0.0026879762299358845, 'timestamp': '2025-09-10 02:43:57.181523', 'step': 3185, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:57.234828', 'step': 3185, 'epoch': 2} +{'type': 'loss', 'content': 0.0037374666426330805, 'timestamp': '2025-09-10 02:43:57.244480', 'step': 3186, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:57.297360', 'step': 3186, 'epoch': 2} +{'type': 'loss', 'content': 0.0074369641952216625, 'timestamp': '2025-09-10 02:43:57.300450', 'step': 3187, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:57.353206', 'step': 3187, 'epoch': 2} +{'type': 'loss', 'content': 0.009308290667831898, 'timestamp': '2025-09-10 02:43:57.358849', 'step': 3188, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:43:57.411780', 'step': 3188, 'epoch': 2} +{'type': 'loss', 'content': 0.00768723338842392, 'timestamp': '2025-09-10 02:43:57.422293', 'step': 3189, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:57.474997', 'step': 3189, 'epoch': 2} +{'type': 'loss', 'content': 0.004245481453835964, 'timestamp': '2025-09-10 02:43:57.481406', 'step': 3190, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:57.534304', 'step': 3190, 'epoch': 2} +{'type': 'loss', 'content': 0.03975455090403557, 'timestamp': '2025-09-10 02:43:57.536584', 'step': 3191, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:57.589297', 'step': 3191, 'epoch': 2} +{'type': 'loss', 'content': 0.015883363783359528, 'timestamp': '2025-09-10 02:43:57.594837', 'step': 3192, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:43:57.647884', 'step': 3192, 'epoch': 2} +{'type': 'loss', 'content': 0.007465971168130636, 'timestamp': '2025-09-10 02:43:57.649886', 'step': 3193, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:57.703022', 'step': 3193, 'epoch': 2} +{'type': 'loss', 'content': 0.005559634882956743, 'timestamp': '2025-09-10 02:43:57.712676', 'step': 3194, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:57.780891', 'step': 3194, 'epoch': 2} +{'type': 'loss', 'content': 0.010664467699825764, 'timestamp': '2025-09-10 02:43:57.793422', 'step': 3195, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:57.846546', 'step': 3195, 'epoch': 2} +{'type': 'loss', 'content': 0.0013000806793570518, 'timestamp': '2025-09-10 02:43:57.852306', 'step': 3196, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:57.903943', 'step': 3196, 'epoch': 2} +{'type': 'loss', 'content': 0.021773725748062134, 'timestamp': '2025-09-10 02:43:57.906054', 'step': 3197, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:43:57.979490', 'step': 3197, 'epoch': 2} +{'type': 'loss', 'content': 0.020083343610167503, 'timestamp': '2025-09-10 02:43:57.993237', 'step': 3198, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:58.046155', 'step': 3198, 'epoch': 2} +{'type': 'loss', 'content': 0.0013784419279545546, 'timestamp': '2025-09-10 02:43:58.048137', 'step': 3199, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:58.100654', 'step': 3199, 'epoch': 2} +{'type': 'loss', 'content': 0.006651018746197224, 'timestamp': '2025-09-10 02:43:58.106304', 'step': 3200, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:58.158887', 'step': 3200, 'epoch': 2} +{'type': 'loss', 'content': 0.001875710440799594, 'timestamp': '2025-09-10 02:43:58.161523', 'step': 3201, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:43:58.229967', 'step': 3201, 'epoch': 2} +{'type': 'loss', 'content': 0.010077283717691898, 'timestamp': '2025-09-10 02:43:58.242641', 'step': 3202, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:43:58.303178', 'step': 3202, 'epoch': 2} +{'type': 'loss', 'content': 0.0010139418300241232, 'timestamp': '2025-09-10 02:43:58.313971', 'step': 3203, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:43:58.375455', 'step': 3203, 'epoch': 2} +{'type': 'loss', 'content': 0.00368149159476161, 'timestamp': '2025-09-10 02:43:58.387368', 'step': 3204, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:43:58.439768', 'step': 3204, 'epoch': 2} +{'type': 'loss', 'content': 0.009633967652916908, 'timestamp': '2025-09-10 02:43:58.441682', 'step': 3205, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:58.494310', 'step': 3205, 'epoch': 2} +{'type': 'loss', 'content': 0.01818724349141121, 'timestamp': '2025-09-10 02:43:58.496444', 'step': 3206, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:58.549371', 'step': 3206, 'epoch': 2} +{'type': 'loss', 'content': 0.0032443441450595856, 'timestamp': '2025-09-10 02:43:58.557237', 'step': 3207, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:58.609966', 'step': 3207, 'epoch': 2} +{'type': 'loss', 'content': 0.0024807697627693415, 'timestamp': '2025-09-10 02:43:58.615695', 'step': 3208, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:58.668226', 'step': 3208, 'epoch': 2} +{'type': 'loss', 'content': 0.0056392294354736805, 'timestamp': '2025-09-10 02:43:58.674329', 'step': 3209, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:43:58.747634', 'step': 3209, 'epoch': 2} +{'type': 'loss', 'content': 0.009499141946434975, 'timestamp': '2025-09-10 02:43:58.761304', 'step': 3210, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:43:58.829215', 'step': 3210, 'epoch': 2} +{'type': 'loss', 'content': 0.003111324505880475, 'timestamp': '2025-09-10 02:43:58.841851', 'step': 3211, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:58.895081', 'step': 3211, 'epoch': 2} +{'type': 'loss', 'content': 0.002740873722359538, 'timestamp': '2025-09-10 02:43:58.900651', 'step': 3212, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:43:58.952916', 'step': 3212, 'epoch': 2} +{'type': 'loss', 'content': 0.0032866480760276318, 'timestamp': '2025-09-10 02:43:58.959173', 'step': 3213, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:43:59.012646', 'step': 3213, 'epoch': 2} +{'type': 'loss', 'content': 0.014923141337931156, 'timestamp': '2025-09-10 02:43:59.022257', 'step': 3214, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:59.075168', 'step': 3214, 'epoch': 2} +{'type': 'loss', 'content': 0.0027747994754463434, 'timestamp': '2025-09-10 02:43:59.077241', 'step': 3215, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:59.130704', 'step': 3215, 'epoch': 2} +{'type': 'loss', 'content': 0.0035309165250509977, 'timestamp': '2025-09-10 02:43:59.136596', 'step': 3216, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:43:59.196357', 'step': 3216, 'epoch': 2} +{'type': 'loss', 'content': 0.007304273080080748, 'timestamp': '2025-09-10 02:43:59.208427', 'step': 3217, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:43:59.266523', 'step': 3217, 'epoch': 2} +{'type': 'loss', 'content': 0.0035353435669094324, 'timestamp': '2025-09-10 02:43:59.277015', 'step': 3218, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:59.329835', 'step': 3218, 'epoch': 2} +{'type': 'loss', 'content': 0.0021327794529497623, 'timestamp': '2025-09-10 02:43:59.331974', 'step': 3219, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:59.384552', 'step': 3219, 'epoch': 2} +{'type': 'loss', 'content': 0.0038116122595965862, 'timestamp': '2025-09-10 02:43:59.390456', 'step': 3220, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:59.442354', 'step': 3220, 'epoch': 2} +{'type': 'loss', 'content': 0.004772346466779709, 'timestamp': '2025-09-10 02:43:59.444482', 'step': 3221, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:43:59.497266', 'step': 3221, 'epoch': 2} +{'type': 'loss', 'content': 0.002986540552228689, 'timestamp': '2025-09-10 02:43:59.505590', 'step': 3222, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:59.559688', 'step': 3222, 'epoch': 2} +{'type': 'loss', 'content': 0.02876511588692665, 'timestamp': '2025-09-10 02:43:59.561770', 'step': 3223, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:59.614381', 'step': 3223, 'epoch': 2} +{'type': 'loss', 'content': 0.02211517095565796, 'timestamp': '2025-09-10 02:43:59.620137', 'step': 3224, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:59.672538', 'step': 3224, 'epoch': 2} +{'type': 'loss', 'content': 0.003185128327459097, 'timestamp': '2025-09-10 02:43:59.674703', 'step': 3225, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:43:59.726981', 'step': 3225, 'epoch': 2} +{'type': 'loss', 'content': 0.002619502367451787, 'timestamp': '2025-09-10 02:43:59.729207', 'step': 3226, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:43:59.781562', 'step': 3226, 'epoch': 2} +{'type': 'loss', 'content': 0.007608884014189243, 'timestamp': '2025-09-10 02:43:59.783786', 'step': 3227, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:43:59.836506', 'step': 3227, 'epoch': 2} +{'type': 'loss', 'content': 0.01058508176356554, 'timestamp': '2025-09-10 02:43:59.842050', 'step': 3228, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:43:59.893836', 'step': 3228, 'epoch': 2} +{'type': 'loss', 'content': 0.0037439449224621058, 'timestamp': '2025-09-10 02:43:59.895841', 'step': 3229, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:43:59.948269', 'step': 3229, 'epoch': 2} +{'type': 'loss', 'content': 0.0023789783008396626, 'timestamp': '2025-09-10 02:43:59.950331', 'step': 3230, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:00.007327', 'step': 3230, 'epoch': 2} +{'type': 'loss', 'content': 0.0311024971306324, 'timestamp': '2025-09-10 02:44:00.017777', 'step': 3231, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:00.071248', 'step': 3231, 'epoch': 2} +{'type': 'loss', 'content': 0.0029404382221400738, 'timestamp': '2025-09-10 02:44:00.076898', 'step': 3232, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:00.128968', 'step': 3232, 'epoch': 2} +{'type': 'loss', 'content': 0.003119255183264613, 'timestamp': '2025-09-10 02:44:00.130922', 'step': 3233, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:00.183286', 'step': 3233, 'epoch': 2} +{'type': 'loss', 'content': 0.0022972356528043747, 'timestamp': '2025-09-10 02:44:00.185476', 'step': 3234, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:44:16.970765', 'step': 3234, 'epoch': 2} +{'type': 'pplx', 'content': 21072886.619915567, 'timestamp': '2025-09-10 02:44:16.973571', 'step': 3234, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:17.028228', 'step': 3234, 'epoch': 2} +{'type': 'loss', 'content': 0.005306145641952753, 'timestamp': '2025-09-10 02:44:17.030566', 'step': 3235, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:17.084533', 'step': 3235, 'epoch': 2} +{'type': 'loss', 'content': 0.007551746908575296, 'timestamp': '2025-09-10 02:44:17.091006', 'step': 3236, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:17.148718', 'step': 3236, 'epoch': 2} +{'type': 'loss', 'content': 0.0039247856475412846, 'timestamp': '2025-09-10 02:44:17.153810', 'step': 3237, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:17.213675', 'step': 3237, 'epoch': 2} +{'type': 'loss', 'content': 0.018452981486916542, 'timestamp': '2025-09-10 02:44:17.223474', 'step': 3238, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:17.276990', 'step': 3238, 'epoch': 2} +{'type': 'loss', 'content': 0.010647733695805073, 'timestamp': '2025-09-10 02:44:17.285212', 'step': 3239, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:17.343826', 'step': 3239, 'epoch': 2} +{'type': 'loss', 'content': 0.0016128303250297904, 'timestamp': '2025-09-10 02:44:17.350110', 'step': 3240, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:17.404554', 'step': 3240, 'epoch': 2} +{'type': 'loss', 'content': 0.0033778951037675142, 'timestamp': '2025-09-10 02:44:17.406570', 'step': 3241, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:17.460658', 'step': 3241, 'epoch': 2} +{'type': 'loss', 'content': 0.037463899701833725, 'timestamp': '2025-09-10 02:44:17.466552', 'step': 3242, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:17.520706', 'step': 3242, 'epoch': 2} +{'type': 'loss', 'content': 0.008468790911138058, 'timestamp': '2025-09-10 02:44:17.523119', 'step': 3243, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 15680095254592.0}, 'timestamp': '2025-09-10 02:44:17.636994', 'step': 3243, 'epoch': 2} +{'type': 'loss', 'content': 0.017466222867369652, 'timestamp': '2025-09-10 02:44:17.659922', 'step': 3244, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:17.714606', 'step': 3244, 'epoch': 2} +{'type': 'loss', 'content': 0.002223787596449256, 'timestamp': '2025-09-10 02:44:17.724910', 'step': 3245, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:17.778190', 'step': 3245, 'epoch': 2} +{'type': 'loss', 'content': 0.012982979416847229, 'timestamp': '2025-09-10 02:44:17.780392', 'step': 3246, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:17.834243', 'step': 3246, 'epoch': 2} +{'type': 'loss', 'content': 0.006452381145209074, 'timestamp': '2025-09-10 02:44:17.841932', 'step': 3247, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:17.896423', 'step': 3247, 'epoch': 2} +{'type': 'loss', 'content': 0.009165070950984955, 'timestamp': '2025-09-10 02:44:17.902794', 'step': 3248, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:44:17.970803', 'step': 3248, 'epoch': 2} +{'type': 'loss', 'content': 0.0018486840417608619, 'timestamp': '2025-09-10 02:44:17.984558', 'step': 3249, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:18.038415', 'step': 3249, 'epoch': 2} +{'type': 'loss', 'content': 0.02603820338845253, 'timestamp': '2025-09-10 02:44:18.041590', 'step': 3250, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:18.094948', 'step': 3250, 'epoch': 2} +{'type': 'loss', 'content': 0.002727191662415862, 'timestamp': '2025-09-10 02:44:18.097222', 'step': 3251, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:18.150020', 'step': 3251, 'epoch': 2} +{'type': 'loss', 'content': 0.02278628572821617, 'timestamp': '2025-09-10 02:44:18.156088', 'step': 3252, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:18.209228', 'step': 3252, 'epoch': 2} +{'type': 'loss', 'content': 0.002900347812101245, 'timestamp': '2025-09-10 02:44:18.211614', 'step': 3253, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:18.264896', 'step': 3253, 'epoch': 2} +{'type': 'loss', 'content': 0.00995334517210722, 'timestamp': '2025-09-10 02:44:18.268172', 'step': 3254, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:18.321906', 'step': 3254, 'epoch': 2} +{'type': 'loss', 'content': 0.0008439576486125588, 'timestamp': '2025-09-10 02:44:18.324305', 'step': 3255, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:18.377885', 'step': 3255, 'epoch': 2} +{'type': 'loss', 'content': 0.006541337352246046, 'timestamp': '2025-09-10 02:44:18.384137', 'step': 3256, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:18.437274', 'step': 3256, 'epoch': 2} +{'type': 'loss', 'content': 0.02270149253308773, 'timestamp': '2025-09-10 02:44:18.439652', 'step': 3257, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:18.493137', 'step': 3257, 'epoch': 2} +{'type': 'loss', 'content': 0.0006551208789460361, 'timestamp': '2025-09-10 02:44:18.495442', 'step': 3258, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:18.548847', 'step': 3258, 'epoch': 2} +{'type': 'loss', 'content': 0.0011171090882271528, 'timestamp': '2025-09-10 02:44:18.551627', 'step': 3259, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:18.605121', 'step': 3259, 'epoch': 2} +{'type': 'loss', 'content': 0.019644513726234436, 'timestamp': '2025-09-10 02:44:18.611154', 'step': 3260, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:44:18.676209', 'step': 3260, 'epoch': 2} +{'type': 'loss', 'content': 0.0007226437446661294, 'timestamp': '2025-09-10 02:44:18.689397', 'step': 3261, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:18.744002', 'step': 3261, 'epoch': 2} +{'type': 'loss', 'content': 0.001632618485018611, 'timestamp': '2025-09-10 02:44:18.750154', 'step': 3262, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:18.803676', 'step': 3262, 'epoch': 2} +{'type': 'loss', 'content': 0.021618137136101723, 'timestamp': '2025-09-10 02:44:18.805851', 'step': 3263, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:44:18.880205', 'step': 3263, 'epoch': 2} +{'type': 'loss', 'content': 0.005943900439888239, 'timestamp': '2025-09-10 02:44:18.894895', 'step': 3264, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:44:18.955298', 'step': 3264, 'epoch': 2} +{'type': 'loss', 'content': 0.0017461031675338745, 'timestamp': '2025-09-10 02:44:18.966865', 'step': 3265, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:19.022686', 'step': 3265, 'epoch': 2} +{'type': 'loss', 'content': 0.013536013662815094, 'timestamp': '2025-09-10 02:44:19.025313', 'step': 3266, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:19.079912', 'step': 3266, 'epoch': 2} +{'type': 'loss', 'content': 0.0010626944713294506, 'timestamp': '2025-09-10 02:44:19.082143', 'step': 3267, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:19.135395', 'step': 3267, 'epoch': 2} +{'type': 'loss', 'content': 0.0007986924611032009, 'timestamp': '2025-09-10 02:44:19.141258', 'step': 3268, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:19.194374', 'step': 3268, 'epoch': 2} +{'type': 'loss', 'content': 0.010827191174030304, 'timestamp': '2025-09-10 02:44:19.196666', 'step': 3269, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:19.249972', 'step': 3269, 'epoch': 2} +{'type': 'loss', 'content': 0.007971197366714478, 'timestamp': '2025-09-10 02:44:19.252366', 'step': 3270, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:19.306000', 'step': 3270, 'epoch': 2} +{'type': 'loss', 'content': 0.00854082778096199, 'timestamp': '2025-09-10 02:44:19.308038', 'step': 3271, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:19.360580', 'step': 3271, 'epoch': 2} +{'type': 'loss', 'content': 0.005443661939352751, 'timestamp': '2025-09-10 02:44:19.366382', 'step': 3272, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:19.419358', 'step': 3272, 'epoch': 2} +{'type': 'loss', 'content': 0.0042872135527431965, 'timestamp': '2025-09-10 02:44:19.421324', 'step': 3273, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:19.474137', 'step': 3273, 'epoch': 2} +{'type': 'loss', 'content': 0.010055058635771275, 'timestamp': '2025-09-10 02:44:19.476328', 'step': 3274, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:19.528568', 'step': 3274, 'epoch': 2} +{'type': 'loss', 'content': 0.0064366958104074, 'timestamp': '2025-09-10 02:44:19.531591', 'step': 3275, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:19.584395', 'step': 3275, 'epoch': 2} +{'type': 'loss', 'content': 0.0013839764287695289, 'timestamp': '2025-09-10 02:44:19.590186', 'step': 3276, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:19.643021', 'step': 3276, 'epoch': 2} +{'type': 'loss', 'content': 0.0011878689983859658, 'timestamp': '2025-09-10 02:44:19.645032', 'step': 3277, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:19.697720', 'step': 3277, 'epoch': 2} +{'type': 'loss', 'content': 0.0009049239451996982, 'timestamp': '2025-09-10 02:44:19.699856', 'step': 3278, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:19.754805', 'step': 3278, 'epoch': 2} +{'type': 'loss', 'content': 0.002064666012302041, 'timestamp': '2025-09-10 02:44:19.764598', 'step': 3279, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:19.818941', 'step': 3279, 'epoch': 2} +{'type': 'loss', 'content': 0.002905084053054452, 'timestamp': '2025-09-10 02:44:19.827307', 'step': 3280, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:19.879565', 'step': 3280, 'epoch': 2} +{'type': 'loss', 'content': 0.002872213488444686, 'timestamp': '2025-09-10 02:44:19.881945', 'step': 3281, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:19.934949', 'step': 3281, 'epoch': 2} +{'type': 'loss', 'content': 0.005986655596643686, 'timestamp': '2025-09-10 02:44:19.941484', 'step': 3282, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:44:20.003286', 'step': 3282, 'epoch': 2} +{'type': 'loss', 'content': 0.00405543390661478, 'timestamp': '2025-09-10 02:44:20.014239', 'step': 3283, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:20.067188', 'step': 3283, 'epoch': 2} +{'type': 'loss', 'content': 0.007533874828368425, 'timestamp': '2025-09-10 02:44:20.072873', 'step': 3284, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:20.126007', 'step': 3284, 'epoch': 2} +{'type': 'loss', 'content': 0.004359562881290913, 'timestamp': '2025-09-10 02:44:20.128671', 'step': 3285, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:20.181505', 'step': 3285, 'epoch': 2} +{'type': 'loss', 'content': 0.004524012096226215, 'timestamp': '2025-09-10 02:44:20.188275', 'step': 3286, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:20.241398', 'step': 3286, 'epoch': 2} +{'type': 'loss', 'content': 0.02158978395164013, 'timestamp': '2025-09-10 02:44:20.243702', 'step': 3287, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:20.305155', 'step': 3287, 'epoch': 2} +{'type': 'loss', 'content': 0.010425695218145847, 'timestamp': '2025-09-10 02:44:20.310763', 'step': 3288, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:20.363305', 'step': 3288, 'epoch': 2} +{'type': 'loss', 'content': 0.006829250603914261, 'timestamp': '2025-09-10 02:44:20.371489', 'step': 3289, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:20.429336', 'step': 3289, 'epoch': 2} +{'type': 'loss', 'content': 0.00593261793255806, 'timestamp': '2025-09-10 02:44:20.431473', 'step': 3290, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:20.484946', 'step': 3290, 'epoch': 2} +{'type': 'loss', 'content': 0.007732923608273268, 'timestamp': '2025-09-10 02:44:20.494520', 'step': 3291, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:20.547394', 'step': 3291, 'epoch': 2} +{'type': 'loss', 'content': 0.028162673115730286, 'timestamp': '2025-09-10 02:44:20.553283', 'step': 3292, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:20.606411', 'step': 3292, 'epoch': 2} +{'type': 'loss', 'content': 0.004819825291633606, 'timestamp': '2025-09-10 02:44:20.620002', 'step': 3293, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:20.676430', 'step': 3293, 'epoch': 2} +{'type': 'loss', 'content': 0.016504162922501564, 'timestamp': '2025-09-10 02:44:20.684386', 'step': 3294, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:20.740783', 'step': 3294, 'epoch': 2} +{'type': 'loss', 'content': 0.0006112895789556205, 'timestamp': '2025-09-10 02:44:20.743030', 'step': 3295, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:20.796494', 'step': 3295, 'epoch': 2} +{'type': 'loss', 'content': 0.0019578339997678995, 'timestamp': '2025-09-10 02:44:20.807642', 'step': 3296, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:20.877319', 'step': 3296, 'epoch': 2} +{'type': 'loss', 'content': 0.018936438485980034, 'timestamp': '2025-09-10 02:44:20.880668', 'step': 3297, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:44:20.950824', 'step': 3297, 'epoch': 2} +{'type': 'loss', 'content': 0.0016984677640721202, 'timestamp': '2025-09-10 02:44:20.963399', 'step': 3298, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:21.017966', 'step': 3298, 'epoch': 2} +{'type': 'loss', 'content': 0.003191543510183692, 'timestamp': '2025-09-10 02:44:21.020366', 'step': 3299, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:21.073349', 'step': 3299, 'epoch': 2} +{'type': 'loss', 'content': 0.005853749345988035, 'timestamp': '2025-09-10 02:44:21.080173', 'step': 3300, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:21.132549', 'step': 3300, 'epoch': 2} +{'type': 'loss', 'content': 0.004540544003248215, 'timestamp': '2025-09-10 02:44:21.134846', 'step': 3301, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:21.188075', 'step': 3301, 'epoch': 2} +{'type': 'loss', 'content': 0.002611069008708, 'timestamp': '2025-09-10 02:44:21.190311', 'step': 3302, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:21.243776', 'step': 3302, 'epoch': 2} +{'type': 'loss', 'content': 0.01286126859486103, 'timestamp': '2025-09-10 02:44:21.246079', 'step': 3303, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:21.299389', 'step': 3303, 'epoch': 2} +{'type': 'loss', 'content': 0.012683229520916939, 'timestamp': '2025-09-10 02:44:21.305576', 'step': 3304, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:21.359326', 'step': 3304, 'epoch': 2} +{'type': 'loss', 'content': 0.0009508299408480525, 'timestamp': '2025-09-10 02:44:21.361420', 'step': 3305, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:21.414975', 'step': 3305, 'epoch': 2} +{'type': 'loss', 'content': 0.01446224283427, 'timestamp': '2025-09-10 02:44:21.417543', 'step': 3306, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:21.470712', 'step': 3306, 'epoch': 2} +{'type': 'loss', 'content': 0.017067965120077133, 'timestamp': '2025-09-10 02:44:21.472914', 'step': 3307, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:21.526425', 'step': 3307, 'epoch': 2} +{'type': 'loss', 'content': 0.023929893970489502, 'timestamp': '2025-09-10 02:44:21.532768', 'step': 3308, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:21.585447', 'step': 3308, 'epoch': 2} +{'type': 'loss', 'content': 0.002198033267632127, 'timestamp': '2025-09-10 02:44:21.593525', 'step': 3309, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:21.647026', 'step': 3309, 'epoch': 2} +{'type': 'loss', 'content': 0.02316807396709919, 'timestamp': '2025-09-10 02:44:21.649039', 'step': 3310, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:21.702430', 'step': 3310, 'epoch': 2} +{'type': 'loss', 'content': 0.0022349718492478132, 'timestamp': '2025-09-10 02:44:21.708849', 'step': 3311, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:21.766916', 'step': 3311, 'epoch': 2} +{'type': 'loss', 'content': 0.02320941723883152, 'timestamp': '2025-09-10 02:44:21.778144', 'step': 3312, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:21.831235', 'step': 3312, 'epoch': 2} +{'type': 'loss', 'content': 0.0041513568721711636, 'timestamp': '2025-09-10 02:44:21.837629', 'step': 3313, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:21.890631', 'step': 3313, 'epoch': 2} +{'type': 'loss', 'content': 0.030766088515520096, 'timestamp': '2025-09-10 02:44:21.893775', 'step': 3314, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:21.946848', 'step': 3314, 'epoch': 2} +{'type': 'loss', 'content': 0.012035499326884747, 'timestamp': '2025-09-10 02:44:21.949148', 'step': 3315, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:22.002046', 'step': 3315, 'epoch': 2} +{'type': 'loss', 'content': 0.02300579473376274, 'timestamp': '2025-09-10 02:44:22.008790', 'step': 3316, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:22.061436', 'step': 3316, 'epoch': 2} +{'type': 'loss', 'content': 0.000930184789467603, 'timestamp': '2025-09-10 02:44:22.064700', 'step': 3317, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:22.118316', 'step': 3317, 'epoch': 2} +{'type': 'loss', 'content': 0.021347789093852043, 'timestamp': '2025-09-10 02:44:22.121455', 'step': 3318, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:44:22.182919', 'step': 3318, 'epoch': 2} +{'type': 'loss', 'content': 0.0012315197382122278, 'timestamp': '2025-09-10 02:44:22.194018', 'step': 3319, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:22.248014', 'step': 3319, 'epoch': 2} +{'type': 'loss', 'content': 0.06900060921907425, 'timestamp': '2025-09-10 02:44:22.258452', 'step': 3320, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:44:22.318054', 'step': 3320, 'epoch': 2} +{'type': 'loss', 'content': 0.0010998686775565147, 'timestamp': '2025-09-10 02:44:22.329883', 'step': 3321, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:22.383200', 'step': 3321, 'epoch': 2} +{'type': 'loss', 'content': 0.017296746373176575, 'timestamp': '2025-09-10 02:44:22.385811', 'step': 3322, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:22.439196', 'step': 3322, 'epoch': 2} +{'type': 'loss', 'content': 0.01512966025620699, 'timestamp': '2025-09-10 02:44:22.442275', 'step': 3323, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:22.495366', 'step': 3323, 'epoch': 2} +{'type': 'loss', 'content': 0.00018649300909601152, 'timestamp': '2025-09-10 02:44:22.501516', 'step': 3324, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:22.555205', 'step': 3324, 'epoch': 2} +{'type': 'loss', 'content': 0.0036657515447586775, 'timestamp': '2025-09-10 02:44:22.565663', 'step': 3325, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-09-10 02:44:22.649918', 'step': 3325, 'epoch': 2} +{'type': 'loss', 'content': 0.00610398082062602, 'timestamp': '2025-09-10 02:44:22.665436', 'step': 3326, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:22.720106', 'step': 3326, 'epoch': 2} +{'type': 'loss', 'content': 0.0034286784939467907, 'timestamp': '2025-09-10 02:44:22.725333', 'step': 3327, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:22.779051', 'step': 3327, 'epoch': 2} +{'type': 'loss', 'content': 0.004797658883035183, 'timestamp': '2025-09-10 02:44:22.785144', 'step': 3328, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:22.837957', 'step': 3328, 'epoch': 2} +{'type': 'loss', 'content': 0.0006010635406710207, 'timestamp': '2025-09-10 02:44:22.840739', 'step': 3329, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:44:22.901319', 'step': 3329, 'epoch': 2} +{'type': 'loss', 'content': 0.001819917350076139, 'timestamp': '2025-09-10 02:44:22.912009', 'step': 3330, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:22.965458', 'step': 3330, 'epoch': 2} +{'type': 'loss', 'content': 0.0010529962601140141, 'timestamp': '2025-09-10 02:44:22.967774', 'step': 3331, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:23.021557', 'step': 3331, 'epoch': 2} +{'type': 'loss', 'content': 0.020855003967881203, 'timestamp': '2025-09-10 02:44:23.027721', 'step': 3332, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:23.080274', 'step': 3332, 'epoch': 2} +{'type': 'loss', 'content': 0.004689569119364023, 'timestamp': '2025-09-10 02:44:23.082239', 'step': 3333, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:23.135311', 'step': 3333, 'epoch': 2} +{'type': 'loss', 'content': 0.03814322128891945, 'timestamp': '2025-09-10 02:44:23.138604', 'step': 3334, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:23.194658', 'step': 3334, 'epoch': 2} +{'type': 'loss', 'content': 0.008889822289347649, 'timestamp': '2025-09-10 02:44:23.196785', 'step': 3335, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:23.250020', 'step': 3335, 'epoch': 2} +{'type': 'loss', 'content': 0.006007408257573843, 'timestamp': '2025-09-10 02:44:23.255985', 'step': 3336, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:23.308876', 'step': 3336, 'epoch': 2} +{'type': 'loss', 'content': 0.004595792852342129, 'timestamp': '2025-09-10 02:44:23.315096', 'step': 3337, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:23.368313', 'step': 3337, 'epoch': 2} +{'type': 'loss', 'content': 0.0029184441082179546, 'timestamp': '2025-09-10 02:44:23.370430', 'step': 3338, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:23.424383', 'step': 3338, 'epoch': 2} +{'type': 'loss', 'content': 0.0044219414703547955, 'timestamp': '2025-09-10 02:44:23.432077', 'step': 3339, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:23.485388', 'step': 3339, 'epoch': 2} +{'type': 'loss', 'content': 0.006493097636848688, 'timestamp': '2025-09-10 02:44:23.491137', 'step': 3340, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:23.543914', 'step': 3340, 'epoch': 2} +{'type': 'loss', 'content': 0.0017229022923856974, 'timestamp': '2025-09-10 02:44:23.546000', 'step': 3341, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:23.598927', 'step': 3341, 'epoch': 2} +{'type': 'loss', 'content': 0.002881512511521578, 'timestamp': '2025-09-10 02:44:23.601145', 'step': 3342, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:23.655230', 'step': 3342, 'epoch': 2} +{'type': 'loss', 'content': 0.02432120032608509, 'timestamp': '2025-09-10 02:44:23.657380', 'step': 3343, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:23.711987', 'step': 3343, 'epoch': 2} +{'type': 'loss', 'content': 0.0251313503831625, 'timestamp': '2025-09-10 02:44:23.718027', 'step': 3344, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:23.774774', 'step': 3344, 'epoch': 2} +{'type': 'loss', 'content': 0.0031528447289019823, 'timestamp': '2025-09-10 02:44:23.785979', 'step': 3345, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:23.839499', 'step': 3345, 'epoch': 2} +{'type': 'loss', 'content': 0.0008504916331730783, 'timestamp': '2025-09-10 02:44:23.841778', 'step': 3346, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:23.895535', 'step': 3346, 'epoch': 2} +{'type': 'loss', 'content': 0.008890111930668354, 'timestamp': '2025-09-10 02:44:23.905156', 'step': 3347, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:23.958635', 'step': 3347, 'epoch': 2} +{'type': 'loss', 'content': 0.02196441777050495, 'timestamp': '2025-09-10 02:44:23.964431', 'step': 3348, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:24.017304', 'step': 3348, 'epoch': 2} +{'type': 'loss', 'content': 0.007154666353017092, 'timestamp': '2025-09-10 02:44:24.019258', 'step': 3349, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:24.072405', 'step': 3349, 'epoch': 2} +{'type': 'loss', 'content': 0.002116176765412092, 'timestamp': '2025-09-10 02:44:24.074613', 'step': 3350, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:24.128259', 'step': 3350, 'epoch': 2} +{'type': 'loss', 'content': 0.07852780818939209, 'timestamp': '2025-09-10 02:44:24.130642', 'step': 3351, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:24.183591', 'step': 3351, 'epoch': 2} +{'type': 'loss', 'content': 0.0003637216577772051, 'timestamp': '2025-09-10 02:44:24.189784', 'step': 3352, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:24.242877', 'step': 3352, 'epoch': 2} +{'type': 'loss', 'content': 0.008689274080097675, 'timestamp': '2025-09-10 02:44:24.245431', 'step': 3353, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:24.299599', 'step': 3353, 'epoch': 2} +{'type': 'loss', 'content': 0.0033759246580302715, 'timestamp': '2025-09-10 02:44:24.309176', 'step': 3354, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:24.362620', 'step': 3354, 'epoch': 2} +{'type': 'loss', 'content': 0.0015087587526068091, 'timestamp': '2025-09-10 02:44:24.364885', 'step': 3355, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:24.418282', 'step': 3355, 'epoch': 2} +{'type': 'loss', 'content': 0.001986152259632945, 'timestamp': '2025-09-10 02:44:24.424255', 'step': 3356, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:24.477341', 'step': 3356, 'epoch': 2} +{'type': 'loss', 'content': 0.012343788519501686, 'timestamp': '2025-09-10 02:44:24.487888', 'step': 3357, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:24.541321', 'step': 3357, 'epoch': 2} +{'type': 'loss', 'content': 0.00478591863065958, 'timestamp': '2025-09-10 02:44:24.543595', 'step': 3358, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:24.597361', 'step': 3358, 'epoch': 2} +{'type': 'loss', 'content': 0.016360685229301453, 'timestamp': '2025-09-10 02:44:24.606964', 'step': 3359, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:24.660154', 'step': 3359, 'epoch': 2} +{'type': 'loss', 'content': 0.043959442526102066, 'timestamp': '2025-09-10 02:44:24.665667', 'step': 3360, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:24.718894', 'step': 3360, 'epoch': 2} +{'type': 'loss', 'content': 0.015006103552877903, 'timestamp': '2025-09-10 02:44:24.725066', 'step': 3361, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:24.778202', 'step': 3361, 'epoch': 2} +{'type': 'loss', 'content': 0.0010425652144476771, 'timestamp': '2025-09-10 02:44:24.784520', 'step': 3362, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:24.838136', 'step': 3362, 'epoch': 2} +{'type': 'loss', 'content': 0.009364688768982887, 'timestamp': '2025-09-10 02:44:24.839923', 'step': 3363, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:24.893119', 'step': 3363, 'epoch': 2} +{'type': 'loss', 'content': 0.0003628287522587925, 'timestamp': '2025-09-10 02:44:24.898802', 'step': 3364, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:24.951293', 'step': 3364, 'epoch': 2} +{'type': 'loss', 'content': 0.002183773322030902, 'timestamp': '2025-09-10 02:44:24.953614', 'step': 3365, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:25.006894', 'step': 3365, 'epoch': 2} +{'type': 'loss', 'content': 0.010487676598131657, 'timestamp': '2025-09-10 02:44:25.009234', 'step': 3366, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:25.062595', 'step': 3366, 'epoch': 2} +{'type': 'loss', 'content': 0.013433645479381084, 'timestamp': '2025-09-10 02:44:25.064936', 'step': 3367, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:25.118624', 'step': 3367, 'epoch': 2} +{'type': 'loss', 'content': 0.008830679580569267, 'timestamp': '2025-09-10 02:44:25.125553', 'step': 3368, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:25.178205', 'step': 3368, 'epoch': 2} +{'type': 'loss', 'content': 0.007802027743309736, 'timestamp': '2025-09-10 02:44:25.180225', 'step': 3369, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:25.233707', 'step': 3369, 'epoch': 2} +{'type': 'loss', 'content': 0.0065420083701610565, 'timestamp': '2025-09-10 02:44:25.235527', 'step': 3370, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:25.288164', 'step': 3370, 'epoch': 2} +{'type': 'loss', 'content': 0.002406763145700097, 'timestamp': '2025-09-10 02:44:25.289964', 'step': 3371, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:25.342920', 'step': 3371, 'epoch': 2} +{'type': 'loss', 'content': 0.0017273176927119493, 'timestamp': '2025-09-10 02:44:25.351828', 'step': 3372, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:25.404527', 'step': 3372, 'epoch': 2} +{'type': 'loss', 'content': 0.030210746452212334, 'timestamp': '2025-09-10 02:44:25.407431', 'step': 3373, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:25.460205', 'step': 3373, 'epoch': 2} +{'type': 'loss', 'content': 0.01453532837331295, 'timestamp': '2025-09-10 02:44:25.462459', 'step': 3374, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:25.516047', 'step': 3374, 'epoch': 2} +{'type': 'loss', 'content': 0.0014118814142420888, 'timestamp': '2025-09-10 02:44:25.523839', 'step': 3375, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:44:25.592105', 'step': 3375, 'epoch': 2} +{'type': 'loss', 'content': 0.01583327353000641, 'timestamp': '2025-09-10 02:44:25.605490', 'step': 3376, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:25.658433', 'step': 3376, 'epoch': 2} +{'type': 'loss', 'content': 0.002258200664073229, 'timestamp': '2025-09-10 02:44:25.660470', 'step': 3377, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:44:25.713128', 'step': 3377, 'epoch': 2} +{'type': 'loss', 'content': 0.015727082267403603, 'timestamp': '2025-09-10 02:44:25.714902', 'step': 3378, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:25.767641', 'step': 3378, 'epoch': 2} +{'type': 'loss', 'content': 0.023107485845685005, 'timestamp': '2025-09-10 02:44:25.769829', 'step': 3379, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:25.824492', 'step': 3379, 'epoch': 2} +{'type': 'loss', 'content': 0.011981871910393238, 'timestamp': '2025-09-10 02:44:25.835050', 'step': 3380, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:44:25.895341', 'step': 3380, 'epoch': 2} +{'type': 'loss', 'content': 0.010925479233264923, 'timestamp': '2025-09-10 02:44:25.907390', 'step': 3381, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:44:42.755425', 'step': 3381, 'epoch': 2} +{'type': 'pplx', 'content': 22882422.552492164, 'timestamp': '2025-09-10 02:44:42.758122', 'step': 3381, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:44:42.824424', 'step': 3381, 'epoch': 2} +{'type': 'loss', 'content': 0.003781441133469343, 'timestamp': '2025-09-10 02:44:42.837000', 'step': 3382, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:42.895383', 'step': 3382, 'epoch': 2} +{'type': 'loss', 'content': 0.014598295092582703, 'timestamp': '2025-09-10 02:44:42.905814', 'step': 3383, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:42.960357', 'step': 3383, 'epoch': 2} +{'type': 'loss', 'content': 0.004385597538203001, 'timestamp': '2025-09-10 02:44:42.970554', 'step': 3384, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:44:43.030679', 'step': 3384, 'epoch': 2} +{'type': 'loss', 'content': 0.004374785348773003, 'timestamp': '2025-09-10 02:44:43.042659', 'step': 3385, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:43.096292', 'step': 3385, 'epoch': 2} +{'type': 'loss', 'content': 0.0020921386312693357, 'timestamp': '2025-09-10 02:44:43.104092', 'step': 3386, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:43.157423', 'step': 3386, 'epoch': 2} +{'type': 'loss', 'content': 0.02318628691136837, 'timestamp': '2025-09-10 02:44:43.163432', 'step': 3387, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:43.220104', 'step': 3387, 'epoch': 2} +{'type': 'loss', 'content': 0.004269985016435385, 'timestamp': '2025-09-10 02:44:43.226129', 'step': 3388, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:43.282653', 'step': 3388, 'epoch': 2} +{'type': 'loss', 'content': 0.007775088306516409, 'timestamp': '2025-09-10 02:44:43.286014', 'step': 3389, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:43.341450', 'step': 3389, 'epoch': 2} +{'type': 'loss', 'content': 0.005463188048452139, 'timestamp': '2025-09-10 02:44:43.343579', 'step': 3390, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:43.410243', 'step': 3390, 'epoch': 2} +{'type': 'loss', 'content': 0.012385325506329536, 'timestamp': '2025-09-10 02:44:43.418600', 'step': 3391, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:43.475393', 'step': 3391, 'epoch': 2} +{'type': 'loss', 'content': 0.0016082149231806397, 'timestamp': '2025-09-10 02:44:43.482021', 'step': 3392, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:43.536110', 'step': 3392, 'epoch': 2} +{'type': 'loss', 'content': 0.016975795850157738, 'timestamp': '2025-09-10 02:44:43.538330', 'step': 3393, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:43.594918', 'step': 3393, 'epoch': 2} +{'type': 'loss', 'content': 0.019571630284190178, 'timestamp': '2025-09-10 02:44:43.604689', 'step': 3394, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:43.660007', 'step': 3394, 'epoch': 2} +{'type': 'loss', 'content': 0.001042350078932941, 'timestamp': '2025-09-10 02:44:43.662076', 'step': 3395, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:43.715154', 'step': 3395, 'epoch': 2} +{'type': 'loss', 'content': 0.00979469995945692, 'timestamp': '2025-09-10 02:44:43.726065', 'step': 3396, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:43.782445', 'step': 3396, 'epoch': 2} +{'type': 'loss', 'content': 0.018953384831547737, 'timestamp': '2025-09-10 02:44:43.792214', 'step': 3397, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:44:43.860776', 'step': 3397, 'epoch': 2} +{'type': 'loss', 'content': 0.004509978927671909, 'timestamp': '2025-09-10 02:44:43.871836', 'step': 3398, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:43.926224', 'step': 3398, 'epoch': 2} +{'type': 'loss', 'content': 0.006286496762186289, 'timestamp': '2025-09-10 02:44:43.933066', 'step': 3399, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:43.995215', 'step': 3399, 'epoch': 2} +{'type': 'loss', 'content': 0.012036184780299664, 'timestamp': '2025-09-10 02:44:44.001091', 'step': 3400, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:44.054376', 'step': 3400, 'epoch': 2} +{'type': 'loss', 'content': 0.002427857369184494, 'timestamp': '2025-09-10 02:44:44.056852', 'step': 3401, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:44.112975', 'step': 3401, 'epoch': 2} +{'type': 'loss', 'content': 0.00120683538261801, 'timestamp': '2025-09-10 02:44:44.122368', 'step': 3402, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:44:44.191450', 'step': 3402, 'epoch': 2} +{'type': 'loss', 'content': 0.0018053905805572867, 'timestamp': '2025-09-10 02:44:44.204187', 'step': 3403, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:44.258302', 'step': 3403, 'epoch': 2} +{'type': 'loss', 'content': 0.0008580326684750617, 'timestamp': '2025-09-10 02:44:44.264074', 'step': 3404, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:44.319006', 'step': 3404, 'epoch': 2} +{'type': 'loss', 'content': 0.045827168971300125, 'timestamp': '2025-09-10 02:44:44.321021', 'step': 3405, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:44.374769', 'step': 3405, 'epoch': 2} +{'type': 'loss', 'content': 0.007903196848928928, 'timestamp': '2025-09-10 02:44:44.384334', 'step': 3406, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:44.438333', 'step': 3406, 'epoch': 2} +{'type': 'loss', 'content': 0.015009711496531963, 'timestamp': '2025-09-10 02:44:44.469175', 'step': 3407, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:44.534255', 'step': 3407, 'epoch': 2} +{'type': 'loss', 'content': 0.03113992139697075, 'timestamp': '2025-09-10 02:44:44.542313', 'step': 3408, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:44.603550', 'step': 3408, 'epoch': 2} +{'type': 'loss', 'content': 0.007063610944896936, 'timestamp': '2025-09-10 02:44:44.614726', 'step': 3409, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:44.669277', 'step': 3409, 'epoch': 2} +{'type': 'loss', 'content': 0.01674896851181984, 'timestamp': '2025-09-10 02:44:44.671400', 'step': 3410, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:44.729239', 'step': 3410, 'epoch': 2} +{'type': 'loss', 'content': 0.00911670457571745, 'timestamp': '2025-09-10 02:44:44.739679', 'step': 3411, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:44.794340', 'step': 3411, 'epoch': 2} +{'type': 'loss', 'content': 0.006235290318727493, 'timestamp': '2025-09-10 02:44:44.801648', 'step': 3412, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:44.857328', 'step': 3412, 'epoch': 2} +{'type': 'loss', 'content': 0.006860231515020132, 'timestamp': '2025-09-10 02:44:44.860076', 'step': 3413, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:44.913896', 'step': 3413, 'epoch': 2} +{'type': 'loss', 'content': 0.009075680747628212, 'timestamp': '2025-09-10 02:44:44.919485', 'step': 3414, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:44.979684', 'step': 3414, 'epoch': 2} +{'type': 'loss', 'content': 0.0019099082564935088, 'timestamp': '2025-09-10 02:44:44.987206', 'step': 3415, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:45.049015', 'step': 3415, 'epoch': 2} +{'type': 'loss', 'content': 0.010206512175500393, 'timestamp': '2025-09-10 02:44:45.054988', 'step': 3416, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:45.107122', 'step': 3416, 'epoch': 2} +{'type': 'loss', 'content': 0.00477524334564805, 'timestamp': '2025-09-10 02:44:45.109927', 'step': 3417, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:45.162846', 'step': 3417, 'epoch': 2} +{'type': 'loss', 'content': 0.005614953581243753, 'timestamp': '2025-09-10 02:44:45.165127', 'step': 3418, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:44:45.225446', 'step': 3418, 'epoch': 2} +{'type': 'loss', 'content': 0.003601041389629245, 'timestamp': '2025-09-10 02:44:45.236164', 'step': 3419, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:45.289662', 'step': 3419, 'epoch': 2} +{'type': 'loss', 'content': 0.013540426269173622, 'timestamp': '2025-09-10 02:44:45.298536', 'step': 3420, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:44:45.370197', 'step': 3420, 'epoch': 2} +{'type': 'loss', 'content': 0.005283182021230459, 'timestamp': '2025-09-10 02:44:45.385124', 'step': 3421, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:45.448208', 'step': 3421, 'epoch': 2} +{'type': 'loss', 'content': 0.03090631775557995, 'timestamp': '2025-09-10 02:44:45.454068', 'step': 3422, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:45.512519', 'step': 3422, 'epoch': 2} +{'type': 'loss', 'content': 0.003905541030690074, 'timestamp': '2025-09-10 02:44:45.523344', 'step': 3423, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:45.584422', 'step': 3423, 'epoch': 2} +{'type': 'loss', 'content': 0.010258356109261513, 'timestamp': '2025-09-10 02:44:45.590304', 'step': 3424, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:45.646403', 'step': 3424, 'epoch': 2} +{'type': 'loss', 'content': 0.0035311724059283733, 'timestamp': '2025-09-10 02:44:45.652522', 'step': 3425, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:45.707375', 'step': 3425, 'epoch': 2} +{'type': 'loss', 'content': 0.008509389124810696, 'timestamp': '2025-09-10 02:44:45.709518', 'step': 3426, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:45.763294', 'step': 3426, 'epoch': 2} +{'type': 'loss', 'content': 0.000998670351691544, 'timestamp': '2025-09-10 02:44:45.765471', 'step': 3427, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:45.818667', 'step': 3427, 'epoch': 2} +{'type': 'loss', 'content': 0.04099467024207115, 'timestamp': '2025-09-10 02:44:45.826928', 'step': 3428, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:45.879584', 'step': 3428, 'epoch': 2} +{'type': 'loss', 'content': 0.01535296905785799, 'timestamp': '2025-09-10 02:44:45.882211', 'step': 3429, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:45.935602', 'step': 3429, 'epoch': 2} +{'type': 'loss', 'content': 0.006567057222127914, 'timestamp': '2025-09-10 02:44:45.937730', 'step': 3430, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:45.995766', 'step': 3430, 'epoch': 2} +{'type': 'loss', 'content': 0.005960697773844004, 'timestamp': '2025-09-10 02:44:46.001112', 'step': 3431, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:46.070051', 'step': 3431, 'epoch': 2} +{'type': 'loss', 'content': 0.020980017259716988, 'timestamp': '2025-09-10 02:44:46.080516', 'step': 3432, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:46.133215', 'step': 3432, 'epoch': 2} +{'type': 'loss', 'content': 0.0021771802566945553, 'timestamp': '2025-09-10 02:44:46.137087', 'step': 3433, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:46.197699', 'step': 3433, 'epoch': 2} +{'type': 'loss', 'content': 0.0008880245732143521, 'timestamp': '2025-09-10 02:44:46.199779', 'step': 3434, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:46.252974', 'step': 3434, 'epoch': 2} +{'type': 'loss', 'content': 0.004372372291982174, 'timestamp': '2025-09-10 02:44:46.255288', 'step': 3435, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:46.311548', 'step': 3435, 'epoch': 2} +{'type': 'loss', 'content': 0.0038417575415223837, 'timestamp': '2025-09-10 02:44:46.317659', 'step': 3436, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:46.371869', 'step': 3436, 'epoch': 2} +{'type': 'loss', 'content': 0.0010600702371448278, 'timestamp': '2025-09-10 02:44:46.374673', 'step': 3437, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:46.427879', 'step': 3437, 'epoch': 2} +{'type': 'loss', 'content': 0.001106555457226932, 'timestamp': '2025-09-10 02:44:46.431913', 'step': 3438, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:44:46.493359', 'step': 3438, 'epoch': 2} +{'type': 'loss', 'content': 0.0005646290956065059, 'timestamp': '2025-09-10 02:44:46.504260', 'step': 3439, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:46.559268', 'step': 3439, 'epoch': 2} +{'type': 'loss', 'content': 0.005553652532398701, 'timestamp': '2025-09-10 02:44:46.572149', 'step': 3440, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:46.629127', 'step': 3440, 'epoch': 2} +{'type': 'loss', 'content': 0.006433610338717699, 'timestamp': '2025-09-10 02:44:46.637322', 'step': 3441, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:46.699838', 'step': 3441, 'epoch': 2} +{'type': 'loss', 'content': 0.0076807462610304356, 'timestamp': '2025-09-10 02:44:46.707516', 'step': 3442, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:46.760771', 'step': 3442, 'epoch': 2} +{'type': 'loss', 'content': 0.0005716619198210537, 'timestamp': '2025-09-10 02:44:46.762771', 'step': 3443, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:46.816841', 'step': 3443, 'epoch': 2} +{'type': 'loss', 'content': 0.006439946126192808, 'timestamp': '2025-09-10 02:44:46.826682', 'step': 3444, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:46.879446', 'step': 3444, 'epoch': 2} +{'type': 'loss', 'content': 0.0033580020535737276, 'timestamp': '2025-09-10 02:44:46.886017', 'step': 3445, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:46.940775', 'step': 3445, 'epoch': 2} +{'type': 'loss', 'content': 0.006483376491814852, 'timestamp': '2025-09-10 02:44:46.944873', 'step': 3446, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:46.999588', 'step': 3446, 'epoch': 2} +{'type': 'loss', 'content': 0.0033562954049557447, 'timestamp': '2025-09-10 02:44:47.007313', 'step': 3447, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:47.061791', 'step': 3447, 'epoch': 2} +{'type': 'loss', 'content': 0.0017835830803960562, 'timestamp': '2025-09-10 02:44:47.067395', 'step': 3448, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:47.121574', 'step': 3448, 'epoch': 2} +{'type': 'loss', 'content': 0.003997988998889923, 'timestamp': '2025-09-10 02:44:47.129626', 'step': 3449, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:47.185941', 'step': 3449, 'epoch': 2} +{'type': 'loss', 'content': 0.004992308560758829, 'timestamp': '2025-09-10 02:44:47.195520', 'step': 3450, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:47.248617', 'step': 3450, 'epoch': 2} +{'type': 'loss', 'content': 0.021727483719587326, 'timestamp': '2025-09-10 02:44:47.250792', 'step': 3451, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:47.304195', 'step': 3451, 'epoch': 2} +{'type': 'loss', 'content': 0.028200071305036545, 'timestamp': '2025-09-10 02:44:47.311212', 'step': 3452, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:47.364206', 'step': 3452, 'epoch': 2} +{'type': 'loss', 'content': 0.013704189099371433, 'timestamp': '2025-09-10 02:44:47.366419', 'step': 3453, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:47.420911', 'step': 3453, 'epoch': 2} +{'type': 'loss', 'content': 0.0029978693928569555, 'timestamp': '2025-09-10 02:44:47.423004', 'step': 3454, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:44:47.477389', 'step': 3454, 'epoch': 2} +{'type': 'loss', 'content': 0.004155627451837063, 'timestamp': '2025-09-10 02:44:47.487193', 'step': 3455, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:44:47.554016', 'step': 3455, 'epoch': 2} +{'type': 'loss', 'content': 0.00584846455603838, 'timestamp': '2025-09-10 02:44:47.567018', 'step': 3456, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:47.620407', 'step': 3456, 'epoch': 2} +{'type': 'loss', 'content': 0.0005385205149650574, 'timestamp': '2025-09-10 02:44:47.626074', 'step': 3457, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:47.679844', 'step': 3457, 'epoch': 2} +{'type': 'loss', 'content': 0.0034869180526584387, 'timestamp': '2025-09-10 02:44:47.682685', 'step': 3458, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:47.735416', 'step': 3458, 'epoch': 2} +{'type': 'loss', 'content': 0.028078759089112282, 'timestamp': '2025-09-10 02:44:47.738327', 'step': 3459, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:47.796901', 'step': 3459, 'epoch': 2} +{'type': 'loss', 'content': 0.001354985754005611, 'timestamp': '2025-09-10 02:44:47.808132', 'step': 3460, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:47.861000', 'step': 3460, 'epoch': 2} +{'type': 'loss', 'content': 0.005084726959466934, 'timestamp': '2025-09-10 02:44:47.867405', 'step': 3461, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:47.921930', 'step': 3461, 'epoch': 2} +{'type': 'loss', 'content': 0.0004792199470102787, 'timestamp': '2025-09-10 02:44:47.924011', 'step': 3462, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:47.981300', 'step': 3462, 'epoch': 2} +{'type': 'loss', 'content': 0.0016709257615730166, 'timestamp': '2025-09-10 02:44:47.983586', 'step': 3463, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:44:48.044187', 'step': 3463, 'epoch': 2} +{'type': 'loss', 'content': 0.01289287954568863, 'timestamp': '2025-09-10 02:44:48.055684', 'step': 3464, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:48.108568', 'step': 3464, 'epoch': 2} +{'type': 'loss', 'content': 0.002130532404407859, 'timestamp': '2025-09-10 02:44:48.118606', 'step': 3465, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:48.172349', 'step': 3465, 'epoch': 2} +{'type': 'loss', 'content': 0.025333818048238754, 'timestamp': '2025-09-10 02:44:48.174604', 'step': 3466, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:48.228450', 'step': 3466, 'epoch': 2} +{'type': 'loss', 'content': 0.0005801789229735732, 'timestamp': '2025-09-10 02:44:48.231182', 'step': 3467, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:48.283882', 'step': 3467, 'epoch': 2} +{'type': 'loss', 'content': 0.0009072088869288564, 'timestamp': '2025-09-10 02:44:48.289506', 'step': 3468, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:48.350774', 'step': 3468, 'epoch': 2} +{'type': 'loss', 'content': 0.016404151916503906, 'timestamp': '2025-09-10 02:44:48.352932', 'step': 3469, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:48.409303', 'step': 3469, 'epoch': 2} +{'type': 'loss', 'content': 0.00040816678665578365, 'timestamp': '2025-09-10 02:44:48.411601', 'step': 3470, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:48.465079', 'step': 3470, 'epoch': 2} +{'type': 'loss', 'content': 0.0018683952512219548, 'timestamp': '2025-09-10 02:44:48.467181', 'step': 3471, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:48.521728', 'step': 3471, 'epoch': 2} +{'type': 'loss', 'content': 0.008625198155641556, 'timestamp': '2025-09-10 02:44:48.527598', 'step': 3472, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:48.583419', 'step': 3472, 'epoch': 2} +{'type': 'loss', 'content': 0.001849312917329371, 'timestamp': '2025-09-10 02:44:48.586223', 'step': 3473, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:48.639672', 'step': 3473, 'epoch': 2} +{'type': 'loss', 'content': 0.0027316075284034014, 'timestamp': '2025-09-10 02:44:48.641860', 'step': 3474, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:48.694349', 'step': 3474, 'epoch': 2} +{'type': 'loss', 'content': 0.012463848106563091, 'timestamp': '2025-09-10 02:44:48.699869', 'step': 3475, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:48.753550', 'step': 3475, 'epoch': 2} +{'type': 'loss', 'content': 0.006688945926725864, 'timestamp': '2025-09-10 02:44:48.759401', 'step': 3476, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:48.812837', 'step': 3476, 'epoch': 2} +{'type': 'loss', 'content': 0.010269487276673317, 'timestamp': '2025-09-10 02:44:48.815566', 'step': 3477, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:48.872709', 'step': 3477, 'epoch': 2} +{'type': 'loss', 'content': 0.000829762895591557, 'timestamp': '2025-09-10 02:44:48.874958', 'step': 3478, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:48.931600', 'step': 3478, 'epoch': 2} +{'type': 'loss', 'content': 0.0018849809421226382, 'timestamp': '2025-09-10 02:44:48.935036', 'step': 3479, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:48.992175', 'step': 3479, 'epoch': 2} +{'type': 'loss', 'content': 0.003088541328907013, 'timestamp': '2025-09-10 02:44:48.997892', 'step': 3480, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:49.050406', 'step': 3480, 'epoch': 2} +{'type': 'loss', 'content': 0.0005800743238069117, 'timestamp': '2025-09-10 02:44:49.053368', 'step': 3481, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:49.114091', 'step': 3481, 'epoch': 2} +{'type': 'loss', 'content': 0.01149090938270092, 'timestamp': '2025-09-10 02:44:49.124522', 'step': 3482, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:49.178394', 'step': 3482, 'epoch': 2} +{'type': 'loss', 'content': 0.0018863646546378732, 'timestamp': '2025-09-10 02:44:49.182068', 'step': 3483, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:49.236428', 'step': 3483, 'epoch': 2} +{'type': 'loss', 'content': 0.007635973393917084, 'timestamp': '2025-09-10 02:44:49.242502', 'step': 3484, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:49.299186', 'step': 3484, 'epoch': 2} +{'type': 'loss', 'content': 0.001805212115868926, 'timestamp': '2025-09-10 02:44:49.301698', 'step': 3485, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:44:49.360240', 'step': 3485, 'epoch': 2} +{'type': 'loss', 'content': 0.000615614524576813, 'timestamp': '2025-09-10 02:44:49.370653', 'step': 3486, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:49.424694', 'step': 3486, 'epoch': 2} +{'type': 'loss', 'content': 0.00028936678427271545, 'timestamp': '2025-09-10 02:44:49.427213', 'step': 3487, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:49.481065', 'step': 3487, 'epoch': 2} +{'type': 'loss', 'content': 0.001837940071709454, 'timestamp': '2025-09-10 02:44:49.487841', 'step': 3488, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:49.540330', 'step': 3488, 'epoch': 2} +{'type': 'loss', 'content': 0.0010537052294239402, 'timestamp': '2025-09-10 02:44:49.542667', 'step': 3489, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:49.597692', 'step': 3489, 'epoch': 2} +{'type': 'loss', 'content': 0.011933451518416405, 'timestamp': '2025-09-10 02:44:49.599957', 'step': 3490, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:49.653285', 'step': 3490, 'epoch': 2} +{'type': 'loss', 'content': 0.0005520405829884112, 'timestamp': '2025-09-10 02:44:49.655803', 'step': 3491, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:49.709211', 'step': 3491, 'epoch': 2} +{'type': 'loss', 'content': 0.008819623850286007, 'timestamp': '2025-09-10 02:44:49.715476', 'step': 3492, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:49.768897', 'step': 3492, 'epoch': 2} +{'type': 'loss', 'content': 0.0011324294609948993, 'timestamp': '2025-09-10 02:44:49.771123', 'step': 3493, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:44:49.838908', 'step': 3493, 'epoch': 2} +{'type': 'loss', 'content': 0.0023660731967538595, 'timestamp': '2025-09-10 02:44:49.851160', 'step': 3494, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:49.904710', 'step': 3494, 'epoch': 2} +{'type': 'loss', 'content': 0.003250342095270753, 'timestamp': '2025-09-10 02:44:49.906866', 'step': 3495, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:44:49.967618', 'step': 3495, 'epoch': 2} +{'type': 'loss', 'content': 0.0021458996925503016, 'timestamp': '2025-09-10 02:44:49.979073', 'step': 3496, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:50.032929', 'step': 3496, 'epoch': 2} +{'type': 'loss', 'content': 0.007626359350979328, 'timestamp': '2025-09-10 02:44:50.035275', 'step': 3497, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:50.090242', 'step': 3497, 'epoch': 2} +{'type': 'loss', 'content': 0.000988470739684999, 'timestamp': '2025-09-10 02:44:50.094040', 'step': 3498, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:44:50.148569', 'step': 3498, 'epoch': 2} +{'type': 'loss', 'content': 0.0017117972020059824, 'timestamp': '2025-09-10 02:44:50.156294', 'step': 3499, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:50.211487', 'step': 3499, 'epoch': 2} +{'type': 'loss', 'content': 0.000862152490299195, 'timestamp': '2025-09-10 02:44:50.217483', 'step': 3500, 'epoch': 2} +{'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-10 02:44:50.787634', 'step': 3500, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:50.845482', 'step': 3500, 'epoch': 2} +{'type': 'loss', 'content': 0.004377846140414476, 'timestamp': '2025-09-10 02:44:50.847830', 'step': 3501, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:50.902552', 'step': 3501, 'epoch': 2} +{'type': 'loss', 'content': 0.0002573088859207928, 'timestamp': '2025-09-10 02:44:50.906285', 'step': 3502, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:44:50.961306', 'step': 3502, 'epoch': 2} +{'type': 'loss', 'content': 0.0006194744491949677, 'timestamp': '2025-09-10 02:44:50.963941', 'step': 3503, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:51.017320', 'step': 3503, 'epoch': 2} +{'type': 'loss', 'content': 0.0004087744455318898, 'timestamp': '2025-09-10 02:44:51.023700', 'step': 3504, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:51.077018', 'step': 3504, 'epoch': 2} +{'type': 'loss', 'content': 0.011638117954134941, 'timestamp': '2025-09-10 02:44:51.079324', 'step': 3505, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:51.132963', 'step': 3505, 'epoch': 2} +{'type': 'loss', 'content': 0.03122425079345703, 'timestamp': '2025-09-10 02:44:51.140657', 'step': 3506, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:51.199885', 'step': 3506, 'epoch': 2} +{'type': 'loss', 'content': 0.0004915146855637431, 'timestamp': '2025-09-10 02:44:51.206998', 'step': 3507, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:51.261317', 'step': 3507, 'epoch': 2} +{'type': 'loss', 'content': 0.00593670504167676, 'timestamp': '2025-09-10 02:44:51.267679', 'step': 3508, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:44:51.321084', 'step': 3508, 'epoch': 2} +{'type': 'loss', 'content': 0.02698325738310814, 'timestamp': '2025-09-10 02:44:51.331046', 'step': 3509, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:51.385448', 'step': 3509, 'epoch': 2} +{'type': 'loss', 'content': 0.0024509276263415813, 'timestamp': '2025-09-10 02:44:51.387884', 'step': 3510, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:51.444406', 'step': 3510, 'epoch': 2} +{'type': 'loss', 'content': 0.0005396420019678771, 'timestamp': '2025-09-10 02:44:51.446557', 'step': 3511, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:51.500453', 'step': 3511, 'epoch': 2} +{'type': 'loss', 'content': 0.0002677281736396253, 'timestamp': '2025-09-10 02:44:51.507089', 'step': 3512, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:51.559675', 'step': 3512, 'epoch': 2} +{'type': 'loss', 'content': 0.004991500172764063, 'timestamp': '2025-09-10 02:44:51.562439', 'step': 3513, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:51.615892', 'step': 3513, 'epoch': 2} +{'type': 'loss', 'content': 0.012850449420511723, 'timestamp': '2025-09-10 02:44:51.618148', 'step': 3514, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:44:51.671017', 'step': 3514, 'epoch': 2} +{'type': 'loss', 'content': 0.0006575720617547631, 'timestamp': '2025-09-10 02:44:51.677304', 'step': 3515, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:51.731690', 'step': 3515, 'epoch': 2} +{'type': 'loss', 'content': 0.003706204006448388, 'timestamp': '2025-09-10 02:44:51.737858', 'step': 3516, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:44:51.792789', 'step': 3516, 'epoch': 2} +{'type': 'loss', 'content': 0.012624149210751057, 'timestamp': '2025-09-10 02:44:51.795155', 'step': 3517, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:51.853890', 'step': 3517, 'epoch': 2} +{'type': 'loss', 'content': 0.006648301612585783, 'timestamp': '2025-09-10 02:44:51.860464', 'step': 3518, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:51.915028', 'step': 3518, 'epoch': 2} +{'type': 'loss', 'content': 0.0007761465385556221, 'timestamp': '2025-09-10 02:44:51.918558', 'step': 3519, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:51.973498', 'step': 3519, 'epoch': 2} +{'type': 'loss', 'content': 0.003467746078968048, 'timestamp': '2025-09-10 02:44:51.979739', 'step': 3520, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:52.036546', 'step': 3520, 'epoch': 2} +{'type': 'loss', 'content': 0.00293541862629354, 'timestamp': '2025-09-10 02:44:52.047368', 'step': 3521, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:52.104036', 'step': 3521, 'epoch': 2} +{'type': 'loss', 'content': 0.005550007801502943, 'timestamp': '2025-09-10 02:44:52.115549', 'step': 3522, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:44:52.179522', 'step': 3522, 'epoch': 2} +{'type': 'loss', 'content': 0.00021561107132583857, 'timestamp': '2025-09-10 02:44:52.184028', 'step': 3523, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:44:52.242056', 'step': 3523, 'epoch': 2} +{'type': 'loss', 'content': 0.0006978377350606024, 'timestamp': '2025-09-10 02:44:52.248473', 'step': 3524, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:44:52.303006', 'step': 3524, 'epoch': 2} +{'type': 'loss', 'content': 0.0013564244145527482, 'timestamp': '2025-09-10 02:44:52.309278', 'step': 3525, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:44:52.380588', 'step': 3525, 'epoch': 2} +{'type': 'loss', 'content': 0.032836150377988815, 'timestamp': '2025-09-10 02:44:52.393113', 'step': 3526, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:44:52.449221', 'step': 3526, 'epoch': 2} +{'type': 'loss', 'content': 0.002782290568575263, 'timestamp': '2025-09-10 02:44:52.451742', 'step': 3527, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:44:52.511967', 'step': 3527, 'epoch': 2} +{'type': 'loss', 'content': 0.00013729430793318897, 'timestamp': '2025-09-10 02:44:52.518088', 'step': 3528, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:45:09.573242', 'step': 3528, 'epoch': 2} +{'type': 'pplx', 'content': 24521115.23953575, 'timestamp': '2025-09-10 02:45:09.576298', 'step': 3528, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:09.630361', 'step': 3528, 'epoch': 2} +{'type': 'loss', 'content': 0.00030652660643681884, 'timestamp': '2025-09-10 02:45:09.637749', 'step': 3529, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:09.691625', 'step': 3529, 'epoch': 2} +{'type': 'loss', 'content': 0.004556176718324423, 'timestamp': '2025-09-10 02:45:09.699512', 'step': 3530, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:09.752614', 'step': 3530, 'epoch': 2} +{'type': 'loss', 'content': 0.004627283196896315, 'timestamp': '2025-09-10 02:45:09.754627', 'step': 3531, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:45:09.823178', 'step': 3531, 'epoch': 2} +{'type': 'loss', 'content': 0.03526419773697853, 'timestamp': '2025-09-10 02:45:09.836629', 'step': 3532, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:09.889713', 'step': 3532, 'epoch': 2} +{'type': 'loss', 'content': 6.712974573019892e-05, 'timestamp': '2025-09-10 02:45:09.897880', 'step': 3533, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:09.953065', 'step': 3533, 'epoch': 2} +{'type': 'loss', 'content': 0.004412380047142506, 'timestamp': '2025-09-10 02:45:09.962890', 'step': 3534, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:10.016148', 'step': 3534, 'epoch': 2} +{'type': 'loss', 'content': 0.0020845436956733465, 'timestamp': '2025-09-10 02:45:10.018263', 'step': 3535, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:10.071208', 'step': 3535, 'epoch': 2} +{'type': 'loss', 'content': 0.00018430438649374992, 'timestamp': '2025-09-10 02:45:10.077137', 'step': 3536, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:10.130207', 'step': 3536, 'epoch': 2} +{'type': 'loss', 'content': 0.012220273725688457, 'timestamp': '2025-09-10 02:45:10.132348', 'step': 3537, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:10.184665', 'step': 3537, 'epoch': 2} +{'type': 'loss', 'content': 0.0004874151200056076, 'timestamp': '2025-09-10 02:45:10.186950', 'step': 3538, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:45:10.265697', 'step': 3538, 'epoch': 2} +{'type': 'loss', 'content': 0.0016656635561957955, 'timestamp': '2025-09-10 02:45:10.279786', 'step': 3539, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:10.333829', 'step': 3539, 'epoch': 2} +{'type': 'loss', 'content': 0.0016235330840572715, 'timestamp': '2025-09-10 02:45:10.342267', 'step': 3540, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:10.394760', 'step': 3540, 'epoch': 2} +{'type': 'loss', 'content': 0.00011918076052097604, 'timestamp': '2025-09-10 02:45:10.397132', 'step': 3541, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:10.450126', 'step': 3541, 'epoch': 2} +{'type': 'loss', 'content': 0.02568671479821205, 'timestamp': '2025-09-10 02:45:10.452372', 'step': 3542, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:10.506330', 'step': 3542, 'epoch': 2} +{'type': 'loss', 'content': 0.002293122699484229, 'timestamp': '2025-09-10 02:45:10.508719', 'step': 3543, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:10.561548', 'step': 3543, 'epoch': 2} +{'type': 'loss', 'content': 0.0008784132078289986, 'timestamp': '2025-09-10 02:45:10.570604', 'step': 3544, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:10.623075', 'step': 3544, 'epoch': 2} +{'type': 'loss', 'content': 0.0007506661349907517, 'timestamp': '2025-09-10 02:45:10.625353', 'step': 3545, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:10.677990', 'step': 3545, 'epoch': 2} +{'type': 'loss', 'content': 0.006393445190042257, 'timestamp': '2025-09-10 02:45:10.680135', 'step': 3546, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:10.733672', 'step': 3546, 'epoch': 2} +{'type': 'loss', 'content': 0.021343769505620003, 'timestamp': '2025-09-10 02:45:10.735907', 'step': 3547, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:10.789439', 'step': 3547, 'epoch': 2} +{'type': 'loss', 'content': 0.006848170887678862, 'timestamp': '2025-09-10 02:45:10.799871', 'step': 3548, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:10.852318', 'step': 3548, 'epoch': 2} +{'type': 'loss', 'content': 0.00032945116981863976, 'timestamp': '2025-09-10 02:45:10.854549', 'step': 3549, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:10.912666', 'step': 3549, 'epoch': 2} +{'type': 'loss', 'content': 0.0009864980820566416, 'timestamp': '2025-09-10 02:45:10.923039', 'step': 3550, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:10.975456', 'step': 3550, 'epoch': 2} +{'type': 'loss', 'content': 0.04976826161146164, 'timestamp': '2025-09-10 02:45:10.978569', 'step': 3551, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:45:11.070594', 'step': 3551, 'epoch': 2} +{'type': 'loss', 'content': 0.008849497884511948, 'timestamp': '2025-09-10 02:45:11.088764', 'step': 3552, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:11.142442', 'step': 3552, 'epoch': 2} +{'type': 'loss', 'content': 0.0006036148406565189, 'timestamp': '2025-09-10 02:45:11.150241', 'step': 3553, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:11.203588', 'step': 3553, 'epoch': 2} +{'type': 'loss', 'content': 0.002904871478676796, 'timestamp': '2025-09-10 02:45:11.206597', 'step': 3554, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:45:11.273280', 'step': 3554, 'epoch': 2} +{'type': 'loss', 'content': 0.0050770253874361515, 'timestamp': '2025-09-10 02:45:11.285550', 'step': 3555, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:11.339959', 'step': 3555, 'epoch': 2} +{'type': 'loss', 'content': 0.0019360597943887115, 'timestamp': '2025-09-10 02:45:11.346137', 'step': 3556, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:11.399311', 'step': 3556, 'epoch': 2} +{'type': 'loss', 'content': 0.013183295726776123, 'timestamp': '2025-09-10 02:45:11.401534', 'step': 3557, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:11.460011', 'step': 3557, 'epoch': 2} +{'type': 'loss', 'content': 0.0019304242450743914, 'timestamp': '2025-09-10 02:45:11.470432', 'step': 3558, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:45:11.543646', 'step': 3558, 'epoch': 2} +{'type': 'loss', 'content': 0.00984844658523798, 'timestamp': '2025-09-10 02:45:11.557090', 'step': 3559, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:45:11.618937', 'step': 3559, 'epoch': 2} +{'type': 'loss', 'content': 0.0002027039008680731, 'timestamp': '2025-09-10 02:45:11.630752', 'step': 3560, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:11.683828', 'step': 3560, 'epoch': 2} +{'type': 'loss', 'content': 0.0009412588551640511, 'timestamp': '2025-09-10 02:45:11.685867', 'step': 3561, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:11.739615', 'step': 3561, 'epoch': 2} +{'type': 'loss', 'content': 0.010938641615211964, 'timestamp': '2025-09-10 02:45:11.749256', 'step': 3562, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:11.802379', 'step': 3562, 'epoch': 2} +{'type': 'loss', 'content': 0.003617644775658846, 'timestamp': '2025-09-10 02:45:11.810498', 'step': 3563, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:11.863791', 'step': 3563, 'epoch': 2} +{'type': 'loss', 'content': 0.002326021669432521, 'timestamp': '2025-09-10 02:45:11.872807', 'step': 3564, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:11.926073', 'step': 3564, 'epoch': 2} +{'type': 'loss', 'content': 0.03195502609014511, 'timestamp': '2025-09-10 02:45:11.935961', 'step': 3565, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:11.989375', 'step': 3565, 'epoch': 2} +{'type': 'loss', 'content': 0.0029527253936976194, 'timestamp': '2025-09-10 02:45:11.992089', 'step': 3566, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.046463', 'step': 3566, 'epoch': 2} +{'type': 'loss', 'content': 0.019422734156250954, 'timestamp': '2025-09-10 02:45:12.048667', 'step': 3567, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:12.101685', 'step': 3567, 'epoch': 2} +{'type': 'loss', 'content': 0.007054646499454975, 'timestamp': '2025-09-10 02:45:12.108639', 'step': 3568, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:12.161690', 'step': 3568, 'epoch': 2} +{'type': 'loss', 'content': 0.001168784569017589, 'timestamp': '2025-09-10 02:45:12.163834', 'step': 3569, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.217371', 'step': 3569, 'epoch': 2} +{'type': 'loss', 'content': 0.00021309501607902348, 'timestamp': '2025-09-10 02:45:12.219932', 'step': 3570, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:12.273387', 'step': 3570, 'epoch': 2} +{'type': 'loss', 'content': 0.017772700637578964, 'timestamp': '2025-09-10 02:45:12.275570', 'step': 3571, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:12.329048', 'step': 3571, 'epoch': 2} +{'type': 'loss', 'content': 0.002913970500230789, 'timestamp': '2025-09-10 02:45:12.336302', 'step': 3572, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:12.389161', 'step': 3572, 'epoch': 2} +{'type': 'loss', 'content': 0.0015646052779629827, 'timestamp': '2025-09-10 02:45:12.395692', 'step': 3573, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:12.448434', 'step': 3573, 'epoch': 2} +{'type': 'loss', 'content': 0.014464369043707848, 'timestamp': '2025-09-10 02:45:12.450580', 'step': 3574, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.504138', 'step': 3574, 'epoch': 2} +{'type': 'loss', 'content': 0.01656530052423477, 'timestamp': '2025-09-10 02:45:12.506250', 'step': 3575, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.559547', 'step': 3575, 'epoch': 2} +{'type': 'loss', 'content': 0.004240933805704117, 'timestamp': '2025-09-10 02:45:12.565601', 'step': 3576, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.618594', 'step': 3576, 'epoch': 2} +{'type': 'loss', 'content': 0.0010632125195115805, 'timestamp': '2025-09-10 02:45:12.621065', 'step': 3577, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:12.673376', 'step': 3577, 'epoch': 2} +{'type': 'loss', 'content': 0.0021287850104272366, 'timestamp': '2025-09-10 02:45:12.679163', 'step': 3578, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:12.732078', 'step': 3578, 'epoch': 2} +{'type': 'loss', 'content': 0.0008523418800905347, 'timestamp': '2025-09-10 02:45:12.739942', 'step': 3579, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:12.795580', 'step': 3579, 'epoch': 2} +{'type': 'loss', 'content': 0.01762009598314762, 'timestamp': '2025-09-10 02:45:12.801404', 'step': 3580, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:12.854437', 'step': 3580, 'epoch': 2} +{'type': 'loss', 'content': 0.06263189762830734, 'timestamp': '2025-09-10 02:45:12.864692', 'step': 3581, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:12.917980', 'step': 3581, 'epoch': 2} +{'type': 'loss', 'content': 0.0008568783523514867, 'timestamp': '2025-09-10 02:45:12.920336', 'step': 3582, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:12.974470', 'step': 3582, 'epoch': 2} +{'type': 'loss', 'content': 0.018838342279195786, 'timestamp': '2025-09-10 02:45:12.984069', 'step': 3583, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:13.037941', 'step': 3583, 'epoch': 2} +{'type': 'loss', 'content': 0.021039584651589394, 'timestamp': '2025-09-10 02:45:13.043950', 'step': 3584, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:13.096182', 'step': 3584, 'epoch': 2} +{'type': 'loss', 'content': 0.0011109011247754097, 'timestamp': '2025-09-10 02:45:13.098546', 'step': 3585, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:13.151599', 'step': 3585, 'epoch': 2} +{'type': 'loss', 'content': 0.03372924029827118, 'timestamp': '2025-09-10 02:45:13.158151', 'step': 3586, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:13.211424', 'step': 3586, 'epoch': 2} +{'type': 'loss', 'content': 0.0005949471378698945, 'timestamp': '2025-09-10 02:45:13.217568', 'step': 3587, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.273695', 'step': 3587, 'epoch': 2} +{'type': 'loss', 'content': 0.004848685581237078, 'timestamp': '2025-09-10 02:45:13.279554', 'step': 3588, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:13.339505', 'step': 3588, 'epoch': 2} +{'type': 'loss', 'content': 0.00045782726374454796, 'timestamp': '2025-09-10 02:45:13.349798', 'step': 3589, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.402301', 'step': 3589, 'epoch': 2} +{'type': 'loss', 'content': 0.0029668339993804693, 'timestamp': '2025-09-10 02:45:13.406944', 'step': 3590, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:13.460620', 'step': 3590, 'epoch': 2} +{'type': 'loss', 'content': 0.010383923538029194, 'timestamp': '2025-09-10 02:45:13.463180', 'step': 3591, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.515713', 'step': 3591, 'epoch': 2} +{'type': 'loss', 'content': 0.03740331903100014, 'timestamp': '2025-09-10 02:45:13.521625', 'step': 3592, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.573808', 'step': 3592, 'epoch': 2} +{'type': 'loss', 'content': 0.0005096433451399207, 'timestamp': '2025-09-10 02:45:13.576009', 'step': 3593, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:13.628991', 'step': 3593, 'epoch': 2} +{'type': 'loss', 'content': 0.017126796767115593, 'timestamp': '2025-09-10 02:45:13.635114', 'step': 3594, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:13.691161', 'step': 3594, 'epoch': 2} +{'type': 'loss', 'content': 0.004927968140691519, 'timestamp': '2025-09-10 02:45:13.700817', 'step': 3595, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:13.753685', 'step': 3595, 'epoch': 2} +{'type': 'loss', 'content': 0.008813808672130108, 'timestamp': '2025-09-10 02:45:13.762569', 'step': 3596, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:13.817946', 'step': 3596, 'epoch': 2} +{'type': 'loss', 'content': 0.003934493288397789, 'timestamp': '2025-09-10 02:45:13.819994', 'step': 3597, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:13.872973', 'step': 3597, 'epoch': 2} +{'type': 'loss', 'content': 0.004867472220212221, 'timestamp': '2025-09-10 02:45:13.876126', 'step': 3598, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.929551', 'step': 3598, 'epoch': 2} +{'type': 'loss', 'content': 0.0005675621214322746, 'timestamp': '2025-09-10 02:45:13.931672', 'step': 3599, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:13.984364', 'step': 3599, 'epoch': 2} +{'type': 'loss', 'content': 0.002587629482150078, 'timestamp': '2025-09-10 02:45:13.990055', 'step': 3600, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:14.042114', 'step': 3600, 'epoch': 2} +{'type': 'loss', 'content': 0.0005651320680044591, 'timestamp': '2025-09-10 02:45:14.045181', 'step': 3601, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:14.097750', 'step': 3601, 'epoch': 2} +{'type': 'loss', 'content': 0.04643111675977707, 'timestamp': '2025-09-10 02:45:14.099942', 'step': 3602, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:14.152929', 'step': 3602, 'epoch': 2} +{'type': 'loss', 'content': 0.0174267441034317, 'timestamp': '2025-09-10 02:45:14.155320', 'step': 3603, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:14.208661', 'step': 3603, 'epoch': 2} +{'type': 'loss', 'content': 0.028429338708519936, 'timestamp': '2025-09-10 02:45:14.214521', 'step': 3604, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:14.267199', 'step': 3604, 'epoch': 2} +{'type': 'loss', 'content': 0.0009339532698504627, 'timestamp': '2025-09-10 02:45:14.270166', 'step': 3605, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:14.323200', 'step': 3605, 'epoch': 2} +{'type': 'loss', 'content': 0.03557131811976433, 'timestamp': '2025-09-10 02:45:14.325657', 'step': 3606, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:45:14.392043', 'step': 3606, 'epoch': 2} +{'type': 'loss', 'content': 0.009677738882601261, 'timestamp': '2025-09-10 02:45:14.404269', 'step': 3607, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:14.457780', 'step': 3607, 'epoch': 2} +{'type': 'loss', 'content': 0.006198307033628225, 'timestamp': '2025-09-10 02:45:14.464069', 'step': 3608, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:14.519005', 'step': 3608, 'epoch': 2} +{'type': 'loss', 'content': 0.0026686161290854216, 'timestamp': '2025-09-10 02:45:14.521278', 'step': 3609, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:14.575101', 'step': 3609, 'epoch': 2} +{'type': 'loss', 'content': 0.0034833138342946768, 'timestamp': '2025-09-10 02:45:14.584759', 'step': 3610, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:14.638442', 'step': 3610, 'epoch': 2} +{'type': 'loss', 'content': 0.003976595588028431, 'timestamp': '2025-09-10 02:45:14.644863', 'step': 3611, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:14.698197', 'step': 3611, 'epoch': 2} +{'type': 'loss', 'content': 0.0007812803378328681, 'timestamp': '2025-09-10 02:45:14.704319', 'step': 3612, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:14.756140', 'step': 3612, 'epoch': 2} +{'type': 'loss', 'content': 0.002657910343259573, 'timestamp': '2025-09-10 02:45:14.758589', 'step': 3613, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:14.811908', 'step': 3613, 'epoch': 2} +{'type': 'loss', 'content': 0.002885453635826707, 'timestamp': '2025-09-10 02:45:14.814703', 'step': 3614, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:14.867594', 'step': 3614, 'epoch': 2} +{'type': 'loss', 'content': 0.021188566461205482, 'timestamp': '2025-09-10 02:45:14.869947', 'step': 3615, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:14.923025', 'step': 3615, 'epoch': 2} +{'type': 'loss', 'content': 0.017414916306734085, 'timestamp': '2025-09-10 02:45:14.929304', 'step': 3616, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:14.981346', 'step': 3616, 'epoch': 2} +{'type': 'loss', 'content': 0.009675375185906887, 'timestamp': '2025-09-10 02:45:14.984522', 'step': 3617, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:15.042464', 'step': 3617, 'epoch': 2} +{'type': 'loss', 'content': 0.009258276782929897, 'timestamp': '2025-09-10 02:45:15.052965', 'step': 3618, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:45:15.121195', 'step': 3618, 'epoch': 2} +{'type': 'loss', 'content': 0.007249352987855673, 'timestamp': '2025-09-10 02:45:15.133793', 'step': 3619, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:15.187443', 'step': 3619, 'epoch': 2} +{'type': 'loss', 'content': 0.0013473917497321963, 'timestamp': '2025-09-10 02:45:15.195954', 'step': 3620, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:15.248699', 'step': 3620, 'epoch': 2} +{'type': 'loss', 'content': 0.0028069086838513613, 'timestamp': '2025-09-10 02:45:15.251023', 'step': 3621, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:15.303580', 'step': 3621, 'epoch': 2} +{'type': 'loss', 'content': 0.00933842547237873, 'timestamp': '2025-09-10 02:45:15.306025', 'step': 3622, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:15.358644', 'step': 3622, 'epoch': 2} +{'type': 'loss', 'content': 0.0156002938747406, 'timestamp': '2025-09-10 02:45:15.366824', 'step': 3623, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:15.421466', 'step': 3623, 'epoch': 2} +{'type': 'loss', 'content': 0.035478200763463974, 'timestamp': '2025-09-10 02:45:15.432061', 'step': 3624, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:15.484344', 'step': 3624, 'epoch': 2} +{'type': 'loss', 'content': 0.0023858884815126657, 'timestamp': '2025-09-10 02:45:15.491088', 'step': 3625, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:15.545633', 'step': 3625, 'epoch': 2} +{'type': 'loss', 'content': 0.016410350799560547, 'timestamp': '2025-09-10 02:45:15.555399', 'step': 3626, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:15.608929', 'step': 3626, 'epoch': 2} +{'type': 'loss', 'content': 0.003903862088918686, 'timestamp': '2025-09-10 02:45:15.611167', 'step': 3627, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:15.663781', 'step': 3627, 'epoch': 2} +{'type': 'loss', 'content': 0.014552460983395576, 'timestamp': '2025-09-10 02:45:15.672507', 'step': 3628, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:45:15.745971', 'step': 3628, 'epoch': 2} +{'type': 'loss', 'content': 0.00588188087567687, 'timestamp': '2025-09-10 02:45:15.761358', 'step': 3629, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:45:15.827682', 'step': 3629, 'epoch': 2} +{'type': 'loss', 'content': 0.006514329928904772, 'timestamp': '2025-09-10 02:45:15.839894', 'step': 3630, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:15.892939', 'step': 3630, 'epoch': 2} +{'type': 'loss', 'content': 0.008370401337742805, 'timestamp': '2025-09-10 02:45:15.895136', 'step': 3631, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:15.948156', 'step': 3631, 'epoch': 2} +{'type': 'loss', 'content': 0.0004211624327581376, 'timestamp': '2025-09-10 02:45:15.954185', 'step': 3632, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:16.006366', 'step': 3632, 'epoch': 2} +{'type': 'loss', 'content': 0.004571163561195135, 'timestamp': '2025-09-10 02:45:16.008761', 'step': 3633, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:16.061433', 'step': 3633, 'epoch': 2} +{'type': 'loss', 'content': 0.010175072588026524, 'timestamp': '2025-09-10 02:45:16.063787', 'step': 3634, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:16.117367', 'step': 3634, 'epoch': 2} +{'type': 'loss', 'content': 0.02154644951224327, 'timestamp': '2025-09-10 02:45:16.125217', 'step': 3635, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:16.178167', 'step': 3635, 'epoch': 2} +{'type': 'loss', 'content': 0.0037484881468117237, 'timestamp': '2025-09-10 02:45:16.184067', 'step': 3636, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:16.236185', 'step': 3636, 'epoch': 2} +{'type': 'loss', 'content': 0.010430445894598961, 'timestamp': '2025-09-10 02:45:16.238484', 'step': 3637, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:16.290988', 'step': 3637, 'epoch': 2} +{'type': 'loss', 'content': 0.004492948763072491, 'timestamp': '2025-09-10 02:45:16.293104', 'step': 3638, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:16.346793', 'step': 3638, 'epoch': 2} +{'type': 'loss', 'content': 0.005959161091595888, 'timestamp': '2025-09-10 02:45:16.352937', 'step': 3639, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:16.413891', 'step': 3639, 'epoch': 2} +{'type': 'loss', 'content': 0.005888893734663725, 'timestamp': '2025-09-10 02:45:16.425474', 'step': 3640, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:16.478219', 'step': 3640, 'epoch': 2} +{'type': 'loss', 'content': 0.00891589093953371, 'timestamp': '2025-09-10 02:45:16.480442', 'step': 3641, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:16.533795', 'step': 3641, 'epoch': 2} +{'type': 'loss', 'content': 0.01220616977661848, 'timestamp': '2025-09-10 02:45:16.536034', 'step': 3642, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:16.589644', 'step': 3642, 'epoch': 2} +{'type': 'loss', 'content': 0.002008298644796014, 'timestamp': '2025-09-10 02:45:16.599238', 'step': 3643, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:16.654071', 'step': 3643, 'epoch': 2} +{'type': 'loss', 'content': 0.0019713356159627438, 'timestamp': '2025-09-10 02:45:16.664483', 'step': 3644, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:16.717903', 'step': 3644, 'epoch': 2} +{'type': 'loss', 'content': 0.019930541515350342, 'timestamp': '2025-09-10 02:45:16.728366', 'step': 3645, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:16.786545', 'step': 3645, 'epoch': 2} +{'type': 'loss', 'content': 0.006152032408863306, 'timestamp': '2025-09-10 02:45:16.796967', 'step': 3646, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:16.849773', 'step': 3646, 'epoch': 2} +{'type': 'loss', 'content': 0.013385327532887459, 'timestamp': '2025-09-10 02:45:16.851880', 'step': 3647, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:16.904691', 'step': 3647, 'epoch': 2} +{'type': 'loss', 'content': 0.0009315438219346106, 'timestamp': '2025-09-10 02:45:16.910569', 'step': 3648, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:16.963162', 'step': 3648, 'epoch': 2} +{'type': 'loss', 'content': 0.0020077070221304893, 'timestamp': '2025-09-10 02:45:16.965506', 'step': 3649, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:17.018565', 'step': 3649, 'epoch': 2} +{'type': 'loss', 'content': 0.00979316420853138, 'timestamp': '2025-09-10 02:45:17.024866', 'step': 3650, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:17.077952', 'step': 3650, 'epoch': 2} +{'type': 'loss', 'content': 0.0014990640338510275, 'timestamp': '2025-09-10 02:45:17.086139', 'step': 3651, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:17.139225', 'step': 3651, 'epoch': 2} +{'type': 'loss', 'content': 0.008185057900846004, 'timestamp': '2025-09-10 02:45:17.145130', 'step': 3652, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:17.197044', 'step': 3652, 'epoch': 2} +{'type': 'loss', 'content': 0.009565098211169243, 'timestamp': '2025-09-10 02:45:17.200214', 'step': 3653, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:17.260615', 'step': 3653, 'epoch': 2} +{'type': 'loss', 'content': 0.018558982759714127, 'timestamp': '2025-09-10 02:45:17.271581', 'step': 3654, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:17.325300', 'step': 3654, 'epoch': 2} +{'type': 'loss', 'content': 0.00753264594823122, 'timestamp': '2025-09-10 02:45:17.332886', 'step': 3655, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:17.385948', 'step': 3655, 'epoch': 2} +{'type': 'loss', 'content': 0.008826500736176968, 'timestamp': '2025-09-10 02:45:17.391916', 'step': 3656, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:17.444259', 'step': 3656, 'epoch': 2} +{'type': 'loss', 'content': 0.00726616894826293, 'timestamp': '2025-09-10 02:45:17.446446', 'step': 3657, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:17.499952', 'step': 3657, 'epoch': 2} +{'type': 'loss', 'content': 0.024102849885821342, 'timestamp': '2025-09-10 02:45:17.509527', 'step': 3658, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:45:17.582428', 'step': 3658, 'epoch': 2} +{'type': 'loss', 'content': 0.0023528921883553267, 'timestamp': '2025-09-10 02:45:17.595887', 'step': 3659, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:17.648919', 'step': 3659, 'epoch': 2} +{'type': 'loss', 'content': 0.003015550086274743, 'timestamp': '2025-09-10 02:45:17.654789', 'step': 3660, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:17.707230', 'step': 3660, 'epoch': 2} +{'type': 'loss', 'content': 0.001021600211970508, 'timestamp': '2025-09-10 02:45:17.709620', 'step': 3661, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:45:17.778384', 'step': 3661, 'epoch': 2} +{'type': 'loss', 'content': 0.009700418449938297, 'timestamp': '2025-09-10 02:45:17.790983', 'step': 3662, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:17.844412', 'step': 3662, 'epoch': 2} +{'type': 'loss', 'content': 0.022200671955943108, 'timestamp': '2025-09-10 02:45:17.846898', 'step': 3663, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:17.900032', 'step': 3663, 'epoch': 2} +{'type': 'loss', 'content': 0.0033406191505491734, 'timestamp': '2025-09-10 02:45:17.907410', 'step': 3664, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:17.964074', 'step': 3664, 'epoch': 2} +{'type': 'loss', 'content': 0.022803468629717827, 'timestamp': '2025-09-10 02:45:17.975292', 'step': 3665, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:18.028638', 'step': 3665, 'epoch': 2} +{'type': 'loss', 'content': 0.02626602165400982, 'timestamp': '2025-09-10 02:45:18.031776', 'step': 3666, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:18.084942', 'step': 3666, 'epoch': 2} +{'type': 'loss', 'content': 0.0030524933245033026, 'timestamp': '2025-09-10 02:45:18.087389', 'step': 3667, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:18.140785', 'step': 3667, 'epoch': 2} +{'type': 'loss', 'content': 0.0135736595839262, 'timestamp': '2025-09-10 02:45:18.146898', 'step': 3668, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:18.199669', 'step': 3668, 'epoch': 2} +{'type': 'loss', 'content': 0.007685056421905756, 'timestamp': '2025-09-10 02:45:18.206054', 'step': 3669, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:18.258771', 'step': 3669, 'epoch': 2} +{'type': 'loss', 'content': 0.0008941399282775819, 'timestamp': '2025-09-10 02:45:18.266819', 'step': 3670, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:18.320651', 'step': 3670, 'epoch': 2} +{'type': 'loss', 'content': 0.011505060829222202, 'timestamp': '2025-09-10 02:45:18.330266', 'step': 3671, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:18.382886', 'step': 3671, 'epoch': 2} +{'type': 'loss', 'content': 0.012330555357038975, 'timestamp': '2025-09-10 02:45:18.389204', 'step': 3672, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:18.441719', 'step': 3672, 'epoch': 2} +{'type': 'loss', 'content': 0.002355178352445364, 'timestamp': '2025-09-10 02:45:18.449872', 'step': 3673, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:18.511259', 'step': 3673, 'epoch': 2} +{'type': 'loss', 'content': 0.00227890582755208, 'timestamp': '2025-09-10 02:45:18.522150', 'step': 3674, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:45:18.591381', 'step': 3674, 'epoch': 2} +{'type': 'loss', 'content': 0.0014533958164975047, 'timestamp': '2025-09-10 02:45:18.604101', 'step': 3675, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:45:35.711353', 'step': 3675, 'epoch': 2} +{'type': 'pplx', 'content': 22935921.714725554, 'timestamp': '2025-09-10 02:45:35.714393', 'step': 3675, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:35.769472', 'step': 3675, 'epoch': 2} +{'type': 'loss', 'content': 0.011157740838825703, 'timestamp': '2025-09-10 02:45:35.776224', 'step': 3676, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:35.829385', 'step': 3676, 'epoch': 2} +{'type': 'loss', 'content': 0.00046526786172762513, 'timestamp': '2025-09-10 02:45:35.839682', 'step': 3677, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:35.902823', 'step': 3677, 'epoch': 2} +{'type': 'loss', 'content': 0.0038929337169975042, 'timestamp': '2025-09-10 02:45:35.905737', 'step': 3678, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:35.961464', 'step': 3678, 'epoch': 2} +{'type': 'loss', 'content': 0.003663030220195651, 'timestamp': '2025-09-10 02:45:35.964070', 'step': 3679, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:36.018933', 'step': 3679, 'epoch': 2} +{'type': 'loss', 'content': 0.011066590435802937, 'timestamp': '2025-09-10 02:45:36.029476', 'step': 3680, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:36.086316', 'step': 3680, 'epoch': 2} +{'type': 'loss', 'content': 0.007109819445759058, 'timestamp': '2025-09-10 02:45:36.097557', 'step': 3681, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:36.156804', 'step': 3681, 'epoch': 2} +{'type': 'loss', 'content': 0.00759152602404356, 'timestamp': '2025-09-10 02:45:36.166398', 'step': 3682, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:36.226098', 'step': 3682, 'epoch': 2} +{'type': 'loss', 'content': 0.0007620376418344676, 'timestamp': '2025-09-10 02:45:36.234200', 'step': 3683, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:36.298154', 'step': 3683, 'epoch': 2} +{'type': 'loss', 'content': 0.012494848109781742, 'timestamp': '2025-09-10 02:45:36.309385', 'step': 3684, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:36.362293', 'step': 3684, 'epoch': 2} +{'type': 'loss', 'content': 0.0016487921820953488, 'timestamp': '2025-09-10 02:45:36.365100', 'step': 3685, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:36.420309', 'step': 3685, 'epoch': 2} +{'type': 'loss', 'content': 0.020231228321790695, 'timestamp': '2025-09-10 02:45:36.422381', 'step': 3686, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:36.481834', 'step': 3686, 'epoch': 2} +{'type': 'loss', 'content': 0.0037350484635680914, 'timestamp': '2025-09-10 02:45:36.491667', 'step': 3687, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:36.554933', 'step': 3687, 'epoch': 2} +{'type': 'loss', 'content': 0.01404801569879055, 'timestamp': '2025-09-10 02:45:36.566083', 'step': 3688, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:36.620397', 'step': 3688, 'epoch': 2} +{'type': 'loss', 'content': 0.003894281107932329, 'timestamp': '2025-09-10 02:45:36.623177', 'step': 3689, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:36.677284', 'step': 3689, 'epoch': 2} +{'type': 'loss', 'content': 0.01687435619533062, 'timestamp': '2025-09-10 02:45:36.686884', 'step': 3690, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:36.751189', 'step': 3690, 'epoch': 2} +{'type': 'loss', 'content': 0.0015043719904497266, 'timestamp': '2025-09-10 02:45:36.758861', 'step': 3691, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:45:36.819505', 'step': 3691, 'epoch': 2} +{'type': 'loss', 'content': 0.008676859550178051, 'timestamp': '2025-09-10 02:45:36.831056', 'step': 3692, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:36.888265', 'step': 3692, 'epoch': 2} +{'type': 'loss', 'content': 0.0006987787783145905, 'timestamp': '2025-09-10 02:45:36.899461', 'step': 3693, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:36.953272', 'step': 3693, 'epoch': 2} +{'type': 'loss', 'content': 0.0005440797540359199, 'timestamp': '2025-09-10 02:45:36.955488', 'step': 3694, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:37.014134', 'step': 3694, 'epoch': 2} +{'type': 'loss', 'content': 0.002616985933855176, 'timestamp': '2025-09-10 02:45:37.016654', 'step': 3695, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:37.070318', 'step': 3695, 'epoch': 2} +{'type': 'loss', 'content': 0.009445978328585625, 'timestamp': '2025-09-10 02:45:37.079400', 'step': 3696, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:37.135162', 'step': 3696, 'epoch': 2} +{'type': 'loss', 'content': 0.0026948130689561367, 'timestamp': '2025-09-10 02:45:37.144957', 'step': 3697, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:37.202055', 'step': 3697, 'epoch': 2} +{'type': 'loss', 'content': 0.00022185503621585667, 'timestamp': '2025-09-10 02:45:37.208582', 'step': 3698, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:37.266790', 'step': 3698, 'epoch': 2} +{'type': 'loss', 'content': 0.0001651530183153227, 'timestamp': '2025-09-10 02:45:37.273152', 'step': 3699, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:37.331222', 'step': 3699, 'epoch': 2} +{'type': 'loss', 'content': 0.005250777117908001, 'timestamp': '2025-09-10 02:45:37.337387', 'step': 3700, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:37.389778', 'step': 3700, 'epoch': 2} +{'type': 'loss', 'content': 0.0019424583297222853, 'timestamp': '2025-09-10 02:45:37.399900', 'step': 3701, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:37.453969', 'step': 3701, 'epoch': 2} +{'type': 'loss', 'content': 0.02517259307205677, 'timestamp': '2025-09-10 02:45:37.456175', 'step': 3702, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:37.508951', 'step': 3702, 'epoch': 2} +{'type': 'loss', 'content': 0.001004486344754696, 'timestamp': '2025-09-10 02:45:37.517004', 'step': 3703, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:37.570698', 'step': 3703, 'epoch': 2} +{'type': 'loss', 'content': 0.0002498834510333836, 'timestamp': '2025-09-10 02:45:37.576629', 'step': 3704, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:37.633599', 'step': 3704, 'epoch': 2} +{'type': 'loss', 'content': 0.00645799282938242, 'timestamp': '2025-09-10 02:45:37.637769', 'step': 3705, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:37.691615', 'step': 3705, 'epoch': 2} +{'type': 'loss', 'content': 6.749193562427536e-05, 'timestamp': '2025-09-10 02:45:37.698338', 'step': 3706, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:37.751783', 'step': 3706, 'epoch': 2} +{'type': 'loss', 'content': 0.0017861025407910347, 'timestamp': '2025-09-10 02:45:37.754224', 'step': 3707, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:37.812588', 'step': 3707, 'epoch': 2} +{'type': 'loss', 'content': 0.029708804562687874, 'timestamp': '2025-09-10 02:45:37.823148', 'step': 3708, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:37.882790', 'step': 3708, 'epoch': 2} +{'type': 'loss', 'content': 0.005415987689048052, 'timestamp': '2025-09-10 02:45:37.894599', 'step': 3709, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:37.954552', 'step': 3709, 'epoch': 2} +{'type': 'loss', 'content': 0.0009763292619027197, 'timestamp': '2025-09-10 02:45:37.964355', 'step': 3710, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:38.019279', 'step': 3710, 'epoch': 2} +{'type': 'loss', 'content': 0.001070567755959928, 'timestamp': '2025-09-10 02:45:38.025526', 'step': 3711, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:38.079475', 'step': 3711, 'epoch': 2} +{'type': 'loss', 'content': 0.004329554736614227, 'timestamp': '2025-09-10 02:45:38.085389', 'step': 3712, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:38.141646', 'step': 3712, 'epoch': 2} +{'type': 'loss', 'content': 0.05482960492372513, 'timestamp': '2025-09-10 02:45:38.144905', 'step': 3713, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:38.209777', 'step': 3713, 'epoch': 2} +{'type': 'loss', 'content': 0.0031996474135667086, 'timestamp': '2025-09-10 02:45:38.220655', 'step': 3714, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:38.295670', 'step': 3714, 'epoch': 2} +{'type': 'loss', 'content': 0.00010142551036551595, 'timestamp': '2025-09-10 02:45:38.298686', 'step': 3715, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:38.358500', 'step': 3715, 'epoch': 2} +{'type': 'loss', 'content': 0.0022924281656742096, 'timestamp': '2025-09-10 02:45:38.371583', 'step': 3716, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:38.430257', 'step': 3716, 'epoch': 2} +{'type': 'loss', 'content': 0.0008062793058343232, 'timestamp': '2025-09-10 02:45:38.432882', 'step': 3717, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:38.485815', 'step': 3717, 'epoch': 2} +{'type': 'loss', 'content': 0.0030017667450010777, 'timestamp': '2025-09-10 02:45:38.492381', 'step': 3718, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:38.549621', 'step': 3718, 'epoch': 2} +{'type': 'loss', 'content': 0.004409337881952524, 'timestamp': '2025-09-10 02:45:38.559434', 'step': 3719, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:45:38.624674', 'step': 3719, 'epoch': 2} +{'type': 'loss', 'content': 0.022649625316262245, 'timestamp': '2025-09-10 02:45:38.636174', 'step': 3720, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:38.692446', 'step': 3720, 'epoch': 2} +{'type': 'loss', 'content': 0.0005659721209667623, 'timestamp': '2025-09-10 02:45:38.698629', 'step': 3721, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:45:38.760940', 'step': 3721, 'epoch': 2} +{'type': 'loss', 'content': 0.0003026895865332335, 'timestamp': '2025-09-10 02:45:38.771826', 'step': 3722, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:38.828489', 'step': 3722, 'epoch': 2} +{'type': 'loss', 'content': 0.009049681015312672, 'timestamp': '2025-09-10 02:45:38.831283', 'step': 3723, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:38.884178', 'step': 3723, 'epoch': 2} +{'type': 'loss', 'content': 0.02390514314174652, 'timestamp': '2025-09-10 02:45:38.889916', 'step': 3724, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:38.942174', 'step': 3724, 'epoch': 2} +{'type': 'loss', 'content': 0.03299976512789726, 'timestamp': '2025-09-10 02:45:38.948707', 'step': 3725, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:39.001930', 'step': 3725, 'epoch': 2} +{'type': 'loss', 'content': 0.008155884221196175, 'timestamp': '2025-09-10 02:45:39.009542', 'step': 3726, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:39.071247', 'step': 3726, 'epoch': 2} +{'type': 'loss', 'content': 0.023253699764609337, 'timestamp': '2025-09-10 02:45:39.081672', 'step': 3727, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:39.134768', 'step': 3727, 'epoch': 2} +{'type': 'loss', 'content': 0.03874696418642998, 'timestamp': '2025-09-10 02:45:39.142201', 'step': 3728, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:45:39.201860', 'step': 3728, 'epoch': 2} +{'type': 'loss', 'content': 0.010049762204289436, 'timestamp': '2025-09-10 02:45:39.213133', 'step': 3729, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:39.266938', 'step': 3729, 'epoch': 2} +{'type': 'loss', 'content': 0.000758556998334825, 'timestamp': '2025-09-10 02:45:39.269658', 'step': 3730, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:39.322436', 'step': 3730, 'epoch': 2} +{'type': 'loss', 'content': 0.019734015688300133, 'timestamp': '2025-09-10 02:45:39.329067', 'step': 3731, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:39.385331', 'step': 3731, 'epoch': 2} +{'type': 'loss', 'content': 0.005894810426980257, 'timestamp': '2025-09-10 02:45:39.391294', 'step': 3732, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:39.443673', 'step': 3732, 'epoch': 2} +{'type': 'loss', 'content': 0.0018528493819758296, 'timestamp': '2025-09-10 02:45:39.451913', 'step': 3733, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:39.505282', 'step': 3733, 'epoch': 2} +{'type': 'loss', 'content': 0.003908081445842981, 'timestamp': '2025-09-10 02:45:39.507958', 'step': 3734, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:39.560945', 'step': 3734, 'epoch': 2} +{'type': 'loss', 'content': 0.0001145283313235268, 'timestamp': '2025-09-10 02:45:39.567493', 'step': 3735, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:39.623179', 'step': 3735, 'epoch': 2} +{'type': 'loss', 'content': 0.006715327966958284, 'timestamp': '2025-09-10 02:45:39.628943', 'step': 3736, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:45:39.718124', 'step': 3736, 'epoch': 2} +{'type': 'loss', 'content': 0.009590490721166134, 'timestamp': '2025-09-10 02:45:39.737129', 'step': 3737, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:39.793605', 'step': 3737, 'epoch': 2} +{'type': 'loss', 'content': 0.0028196871280670166, 'timestamp': '2025-09-10 02:45:39.796820', 'step': 3738, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:39.854233', 'step': 3738, 'epoch': 2} +{'type': 'loss', 'content': 0.0003894695546478033, 'timestamp': '2025-09-10 02:45:39.856530', 'step': 3739, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:45:39.941299', 'step': 3739, 'epoch': 2} +{'type': 'loss', 'content': 0.01211837213486433, 'timestamp': '2025-09-10 02:45:39.955798', 'step': 3740, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:40.009878', 'step': 3740, 'epoch': 2} +{'type': 'loss', 'content': 0.007021053694188595, 'timestamp': '2025-09-10 02:45:40.020337', 'step': 3741, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:40.074695', 'step': 3741, 'epoch': 2} +{'type': 'loss', 'content': 0.0019118876662105322, 'timestamp': '2025-09-10 02:45:40.077108', 'step': 3742, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:40.130352', 'step': 3742, 'epoch': 2} +{'type': 'loss', 'content': 0.005367000121623278, 'timestamp': '2025-09-10 02:45:40.132791', 'step': 3743, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:40.186009', 'step': 3743, 'epoch': 2} +{'type': 'loss', 'content': 0.006436459254473448, 'timestamp': '2025-09-10 02:45:40.194895', 'step': 3744, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:40.247649', 'step': 3744, 'epoch': 2} +{'type': 'loss', 'content': 0.009422756731510162, 'timestamp': '2025-09-10 02:45:40.249828', 'step': 3745, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:40.302501', 'step': 3745, 'epoch': 2} +{'type': 'loss', 'content': 0.03521189093589783, 'timestamp': '2025-09-10 02:45:40.304861', 'step': 3746, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:40.358705', 'step': 3746, 'epoch': 2} +{'type': 'loss', 'content': 0.0012213027803227305, 'timestamp': '2025-09-10 02:45:40.361080', 'step': 3747, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:40.416309', 'step': 3747, 'epoch': 2} +{'type': 'loss', 'content': 0.0006766091100871563, 'timestamp': '2025-09-10 02:45:40.425851', 'step': 3748, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:40.478207', 'step': 3748, 'epoch': 2} +{'type': 'loss', 'content': 0.010785062797367573, 'timestamp': '2025-09-10 02:45:40.480601', 'step': 3749, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:40.537037', 'step': 3749, 'epoch': 2} +{'type': 'loss', 'content': 0.005762494169175625, 'timestamp': '2025-09-10 02:45:40.539487', 'step': 3750, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:40.594461', 'step': 3750, 'epoch': 2} +{'type': 'loss', 'content': 0.008459271863102913, 'timestamp': '2025-09-10 02:45:40.604240', 'step': 3751, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:45:40.679627', 'step': 3751, 'epoch': 2} +{'type': 'loss', 'content': 0.0013220703694969416, 'timestamp': '2025-09-10 02:45:40.692961', 'step': 3752, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:40.745888', 'step': 3752, 'epoch': 2} +{'type': 'loss', 'content': 0.007617729250341654, 'timestamp': '2025-09-10 02:45:40.752343', 'step': 3753, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:40.805366', 'step': 3753, 'epoch': 2} +{'type': 'loss', 'content': 0.00389980711042881, 'timestamp': '2025-09-10 02:45:40.810834', 'step': 3754, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:40.865568', 'step': 3754, 'epoch': 2} +{'type': 'loss', 'content': 0.0048908935859799385, 'timestamp': '2025-09-10 02:45:40.868981', 'step': 3755, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:40.928441', 'step': 3755, 'epoch': 2} +{'type': 'loss', 'content': 0.001440156251192093, 'timestamp': '2025-09-10 02:45:40.935601', 'step': 3756, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:45:40.993390', 'step': 3756, 'epoch': 2} +{'type': 'loss', 'content': 0.020689917728304863, 'timestamp': '2025-09-10 02:45:40.995500', 'step': 3757, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:41.053234', 'step': 3757, 'epoch': 2} +{'type': 'loss', 'content': 0.0023082138504832983, 'timestamp': '2025-09-10 02:45:41.055476', 'step': 3758, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:41.107930', 'step': 3758, 'epoch': 2} +{'type': 'loss', 'content': 0.004035149235278368, 'timestamp': '2025-09-10 02:45:41.111325', 'step': 3759, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:41.165716', 'step': 3759, 'epoch': 2} +{'type': 'loss', 'content': 0.02253444492816925, 'timestamp': '2025-09-10 02:45:41.173085', 'step': 3760, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:41.227636', 'step': 3760, 'epoch': 2} +{'type': 'loss', 'content': 0.03691099211573601, 'timestamp': '2025-09-10 02:45:41.230024', 'step': 3761, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:45:41.320609', 'step': 3761, 'epoch': 2} +{'type': 'loss', 'content': 0.024642014876008034, 'timestamp': '2025-09-10 02:45:41.337711', 'step': 3762, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:41.395575', 'step': 3762, 'epoch': 2} +{'type': 'loss', 'content': 0.0014374173479154706, 'timestamp': '2025-09-10 02:45:41.398834', 'step': 3763, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:41.452457', 'step': 3763, 'epoch': 2} +{'type': 'loss', 'content': 0.005778650287538767, 'timestamp': '2025-09-10 02:45:41.458445', 'step': 3764, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:41.511238', 'step': 3764, 'epoch': 2} +{'type': 'loss', 'content': 0.0020793273579329252, 'timestamp': '2025-09-10 02:45:41.513617', 'step': 3765, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:41.566775', 'step': 3765, 'epoch': 2} +{'type': 'loss', 'content': 0.01677837036550045, 'timestamp': '2025-09-10 02:45:41.571323', 'step': 3766, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:41.624563', 'step': 3766, 'epoch': 2} +{'type': 'loss', 'content': 0.007229361217468977, 'timestamp': '2025-09-10 02:45:41.628254', 'step': 3767, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:41.682766', 'step': 3767, 'epoch': 2} +{'type': 'loss', 'content': 0.0087441960349679, 'timestamp': '2025-09-10 02:45:41.688707', 'step': 3768, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:41.740627', 'step': 3768, 'epoch': 2} +{'type': 'loss', 'content': 0.04530872777104378, 'timestamp': '2025-09-10 02:45:41.742942', 'step': 3769, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:41.799060', 'step': 3769, 'epoch': 2} +{'type': 'loss', 'content': 0.005834842566400766, 'timestamp': '2025-09-10 02:45:41.803187', 'step': 3770, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:41.858344', 'step': 3770, 'epoch': 2} +{'type': 'loss', 'content': 0.011207705363631248, 'timestamp': '2025-09-10 02:45:41.861883', 'step': 3771, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:41.919413', 'step': 3771, 'epoch': 2} +{'type': 'loss', 'content': 0.003581272903829813, 'timestamp': '2025-09-10 02:45:41.925437', 'step': 3772, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:41.979713', 'step': 3772, 'epoch': 2} +{'type': 'loss', 'content': 0.0015710501465946436, 'timestamp': '2025-09-10 02:45:41.989629', 'step': 3773, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:42.047535', 'step': 3773, 'epoch': 2} +{'type': 'loss', 'content': 0.007571594323962927, 'timestamp': '2025-09-10 02:45:42.055931', 'step': 3774, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:42.115257', 'step': 3774, 'epoch': 2} +{'type': 'loss', 'content': 0.03263309970498085, 'timestamp': '2025-09-10 02:45:42.126131', 'step': 3775, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:42.185795', 'step': 3775, 'epoch': 2} +{'type': 'loss', 'content': 0.003294537076726556, 'timestamp': '2025-09-10 02:45:42.199155', 'step': 3776, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:42.263580', 'step': 3776, 'epoch': 2} +{'type': 'loss', 'content': 0.0017571768257766962, 'timestamp': '2025-09-10 02:45:42.266076', 'step': 3777, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:42.319304', 'step': 3777, 'epoch': 2} +{'type': 'loss', 'content': 0.0004685258027166128, 'timestamp': '2025-09-10 02:45:42.322399', 'step': 3778, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:42.377471', 'step': 3778, 'epoch': 2} +{'type': 'loss', 'content': 0.012189905159175396, 'timestamp': '2025-09-10 02:45:42.386406', 'step': 3779, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:42.442789', 'step': 3779, 'epoch': 2} +{'type': 'loss', 'content': 0.0740482434630394, 'timestamp': '2025-09-10 02:45:42.448795', 'step': 3780, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:42.501518', 'step': 3780, 'epoch': 2} +{'type': 'loss', 'content': 0.01791914366185665, 'timestamp': '2025-09-10 02:45:42.504315', 'step': 3781, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:42.558103', 'step': 3781, 'epoch': 2} +{'type': 'loss', 'content': 0.00022058424656279385, 'timestamp': '2025-09-10 02:45:42.560302', 'step': 3782, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:42.617485', 'step': 3782, 'epoch': 2} +{'type': 'loss', 'content': 0.0028940639458596706, 'timestamp': '2025-09-10 02:45:42.619812', 'step': 3783, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:45:42.676106', 'step': 3783, 'epoch': 2} +{'type': 'loss', 'content': 0.016884662210941315, 'timestamp': '2025-09-10 02:45:42.686469', 'step': 3784, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:42.739204', 'step': 3784, 'epoch': 2} +{'type': 'loss', 'content': 0.005923439748585224, 'timestamp': '2025-09-10 02:45:42.744920', 'step': 3785, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:42.802424', 'step': 3785, 'epoch': 2} +{'type': 'loss', 'content': 0.03181489184498787, 'timestamp': '2025-09-10 02:45:42.804669', 'step': 3786, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:42.862624', 'step': 3786, 'epoch': 2} +{'type': 'loss', 'content': 0.002827305346727371, 'timestamp': '2025-09-10 02:45:42.864891', 'step': 3787, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:45:42.961809', 'step': 3787, 'epoch': 2} +{'type': 'loss', 'content': 0.00886689592152834, 'timestamp': '2025-09-10 02:45:42.981137', 'step': 3788, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:43.033704', 'step': 3788, 'epoch': 2} +{'type': 'loss', 'content': 0.026850944384932518, 'timestamp': '2025-09-10 02:45:43.035944', 'step': 3789, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:43.088915', 'step': 3789, 'epoch': 2} +{'type': 'loss', 'content': 0.030168982222676277, 'timestamp': '2025-09-10 02:45:43.092538', 'step': 3790, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:43.146983', 'step': 3790, 'epoch': 2} +{'type': 'loss', 'content': 0.013773827813565731, 'timestamp': '2025-09-10 02:45:43.149626', 'step': 3791, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:43.211897', 'step': 3791, 'epoch': 2} +{'type': 'loss', 'content': 0.002505003707483411, 'timestamp': '2025-09-10 02:45:43.220942', 'step': 3792, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:43.274547', 'step': 3792, 'epoch': 2} +{'type': 'loss', 'content': 0.002235317835584283, 'timestamp': '2025-09-10 02:45:43.282519', 'step': 3793, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:43.338820', 'step': 3793, 'epoch': 2} +{'type': 'loss', 'content': 0.0030649728141725063, 'timestamp': '2025-09-10 02:45:43.341281', 'step': 3794, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:45:43.394450', 'step': 3794, 'epoch': 2} +{'type': 'loss', 'content': 0.008617487736046314, 'timestamp': '2025-09-10 02:45:43.398299', 'step': 3795, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:43.456881', 'step': 3795, 'epoch': 2} +{'type': 'loss', 'content': 0.003183516440913081, 'timestamp': '2025-09-10 02:45:43.462835', 'step': 3796, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:43.515887', 'step': 3796, 'epoch': 2} +{'type': 'loss', 'content': 0.007166016381233931, 'timestamp': '2025-09-10 02:45:43.522479', 'step': 3797, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:43.575669', 'step': 3797, 'epoch': 2} +{'type': 'loss', 'content': 0.0042429291643202305, 'timestamp': '2025-09-10 02:45:43.584056', 'step': 3798, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:43.636785', 'step': 3798, 'epoch': 2} +{'type': 'loss', 'content': 0.010450095869600773, 'timestamp': '2025-09-10 02:45:43.641356', 'step': 3799, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:43.694493', 'step': 3799, 'epoch': 2} +{'type': 'loss', 'content': 0.010051152668893337, 'timestamp': '2025-09-10 02:45:43.700418', 'step': 3800, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:43.752551', 'step': 3800, 'epoch': 2} +{'type': 'loss', 'content': 0.0075791762210428715, 'timestamp': '2025-09-10 02:45:43.757144', 'step': 3801, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:43.813354', 'step': 3801, 'epoch': 2} +{'type': 'loss', 'content': 0.0007832504925318062, 'timestamp': '2025-09-10 02:45:43.815666', 'step': 3802, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:43.869711', 'step': 3802, 'epoch': 2} +{'type': 'loss', 'content': 0.003694644197821617, 'timestamp': '2025-09-10 02:45:43.876272', 'step': 3803, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:43.930776', 'step': 3803, 'epoch': 2} +{'type': 'loss', 'content': 0.006956086959689856, 'timestamp': '2025-09-10 02:45:43.938163', 'step': 3804, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:45:44.004949', 'step': 3804, 'epoch': 2} +{'type': 'loss', 'content': 0.00568874878808856, 'timestamp': '2025-09-10 02:45:44.018605', 'step': 3805, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:44.075024', 'step': 3805, 'epoch': 2} +{'type': 'loss', 'content': 0.010045773349702358, 'timestamp': '2025-09-10 02:45:44.084818', 'step': 3806, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:44.138394', 'step': 3806, 'epoch': 2} +{'type': 'loss', 'content': 0.0015708133578300476, 'timestamp': '2025-09-10 02:45:44.144663', 'step': 3807, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:45:44.199390', 'step': 3807, 'epoch': 2} +{'type': 'loss', 'content': 0.002105496358126402, 'timestamp': '2025-09-10 02:45:44.208624', 'step': 3808, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:45:44.267140', 'step': 3808, 'epoch': 2} +{'type': 'loss', 'content': 0.014348874799907207, 'timestamp': '2025-09-10 02:45:44.273795', 'step': 3809, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:44.328573', 'step': 3809, 'epoch': 2} +{'type': 'loss', 'content': 0.009345331229269505, 'timestamp': '2025-09-10 02:45:44.331411', 'step': 3810, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:45:44.392589', 'step': 3810, 'epoch': 2} +{'type': 'loss', 'content': 0.009542659856379032, 'timestamp': '2025-09-10 02:45:44.402352', 'step': 3811, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:44.455971', 'step': 3811, 'epoch': 2} +{'type': 'loss', 'content': 0.021965688094496727, 'timestamp': '2025-09-10 02:45:44.467161', 'step': 3812, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:44.520940', 'step': 3812, 'epoch': 2} +{'type': 'loss', 'content': 0.003360508708283305, 'timestamp': '2025-09-10 02:45:44.523979', 'step': 3813, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:44.578882', 'step': 3813, 'epoch': 2} +{'type': 'loss', 'content': 0.008107351139187813, 'timestamp': '2025-09-10 02:45:44.587147', 'step': 3814, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:44.645119', 'step': 3814, 'epoch': 2} +{'type': 'loss', 'content': 0.003683290909975767, 'timestamp': '2025-09-10 02:45:44.650551', 'step': 3815, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:45:44.707201', 'step': 3815, 'epoch': 2} +{'type': 'loss', 'content': 0.004462660755962133, 'timestamp': '2025-09-10 02:45:44.713191', 'step': 3816, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:45:44.767085', 'step': 3816, 'epoch': 2} +{'type': 'loss', 'content': 0.01923714391887188, 'timestamp': '2025-09-10 02:45:44.769906', 'step': 3817, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:45:44.827118', 'step': 3817, 'epoch': 2} +{'type': 'loss', 'content': 0.0022326426114887, 'timestamp': '2025-09-10 02:45:44.830039', 'step': 3818, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:45:44.887663', 'step': 3818, 'epoch': 2} +{'type': 'loss', 'content': 0.02179141901433468, 'timestamp': '2025-09-10 02:45:44.890588', 'step': 3819, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:45:44.943519', 'step': 3819, 'epoch': 2} +{'type': 'loss', 'content': 0.01936708576977253, 'timestamp': '2025-09-10 02:45:44.952636', 'step': 3820, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:45.006058', 'step': 3820, 'epoch': 2} +{'type': 'loss', 'content': 0.002558376407250762, 'timestamp': '2025-09-10 02:45:45.008156', 'step': 3821, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:45:45.065202', 'step': 3821, 'epoch': 2} +{'type': 'loss', 'content': 0.026886161416769028, 'timestamp': '2025-09-10 02:45:45.067497', 'step': 3822, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:46:01.849359', 'step': 3822, 'epoch': 2} +{'type': 'pplx', 'content': 25595522.624360383, 'timestamp': '2025-09-10 02:46:01.852615', 'step': 3822, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:46:01.925157', 'step': 3822, 'epoch': 2} +{'type': 'loss', 'content': 0.02511192485690117, 'timestamp': '2025-09-10 02:46:01.938790', 'step': 3823, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:46:02.005886', 'step': 3823, 'epoch': 2} +{'type': 'loss', 'content': 0.015108300372958183, 'timestamp': '2025-09-10 02:46:02.018868', 'step': 3824, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:02.072955', 'step': 3824, 'epoch': 2} +{'type': 'loss', 'content': 0.001703347428701818, 'timestamp': '2025-09-10 02:46:02.075359', 'step': 3825, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:02.130021', 'step': 3825, 'epoch': 2} +{'type': 'loss', 'content': 0.0062110270373523235, 'timestamp': '2025-09-10 02:46:02.139816', 'step': 3826, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:46:02.213436', 'step': 3826, 'epoch': 2} +{'type': 'loss', 'content': 0.014068561606109142, 'timestamp': '2025-09-10 02:46:02.226946', 'step': 3827, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:02.290791', 'step': 3827, 'epoch': 2} +{'type': 'loss', 'content': 0.005498811602592468, 'timestamp': '2025-09-10 02:46:02.301553', 'step': 3828, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:46:02.372910', 'step': 3828, 'epoch': 2} +{'type': 'loss', 'content': 0.011925933882594109, 'timestamp': '2025-09-10 02:46:02.386954', 'step': 3829, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:02.449805', 'step': 3829, 'epoch': 2} +{'type': 'loss', 'content': 0.0025502387434244156, 'timestamp': '2025-09-10 02:46:02.455526', 'step': 3830, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:02.523035', 'step': 3830, 'epoch': 2} +{'type': 'loss', 'content': 0.016895176842808723, 'timestamp': '2025-09-10 02:46:02.538564', 'step': 3831, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:02.608071', 'step': 3831, 'epoch': 2} +{'type': 'loss', 'content': 0.00885614100843668, 'timestamp': '2025-09-10 02:46:02.621006', 'step': 3832, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:02.678964', 'step': 3832, 'epoch': 2} +{'type': 'loss', 'content': 0.010726043954491615, 'timestamp': '2025-09-10 02:46:02.688897', 'step': 3833, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:02.745334', 'step': 3833, 'epoch': 2} +{'type': 'loss', 'content': 0.021753041073679924, 'timestamp': '2025-09-10 02:46:02.750667', 'step': 3834, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:02.811040', 'step': 3834, 'epoch': 2} +{'type': 'loss', 'content': 0.009483088739216328, 'timestamp': '2025-09-10 02:46:02.820620', 'step': 3835, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:02.885099', 'step': 3835, 'epoch': 2} +{'type': 'loss', 'content': 0.008262093178927898, 'timestamp': '2025-09-10 02:46:02.896308', 'step': 3836, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:02.960650', 'step': 3836, 'epoch': 2} +{'type': 'loss', 'content': 0.0010429082904011011, 'timestamp': '2025-09-10 02:46:02.970913', 'step': 3837, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:03.029441', 'step': 3837, 'epoch': 2} +{'type': 'loss', 'content': 0.005868071224540472, 'timestamp': '2025-09-10 02:46:03.037853', 'step': 3838, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:03.098177', 'step': 3838, 'epoch': 2} +{'type': 'loss', 'content': 0.0011152158258482814, 'timestamp': '2025-09-10 02:46:03.106168', 'step': 3839, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:03.161348', 'step': 3839, 'epoch': 2} +{'type': 'loss', 'content': 0.000761518080253154, 'timestamp': '2025-09-10 02:46:03.171952', 'step': 3840, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:03.225150', 'step': 3840, 'epoch': 2} +{'type': 'loss', 'content': 0.0010029973927885294, 'timestamp': '2025-09-10 02:46:03.227331', 'step': 3841, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:03.280202', 'step': 3841, 'epoch': 2} +{'type': 'loss', 'content': 0.00562081765383482, 'timestamp': '2025-09-10 02:46:03.282605', 'step': 3842, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:03.335065', 'step': 3842, 'epoch': 2} +{'type': 'loss', 'content': 0.008440883830189705, 'timestamp': '2025-09-10 02:46:03.337208', 'step': 3843, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:03.389815', 'step': 3843, 'epoch': 2} +{'type': 'loss', 'content': 0.001560679986141622, 'timestamp': '2025-09-10 02:46:03.396030', 'step': 3844, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:03.448404', 'step': 3844, 'epoch': 2} +{'type': 'loss', 'content': 0.00929208192974329, 'timestamp': '2025-09-10 02:46:03.450887', 'step': 3845, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:03.503780', 'step': 3845, 'epoch': 2} +{'type': 'loss', 'content': 0.0032838734332472086, 'timestamp': '2025-09-10 02:46:03.506230', 'step': 3846, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:03.568100', 'step': 3846, 'epoch': 2} +{'type': 'loss', 'content': 0.0028587591368705034, 'timestamp': '2025-09-10 02:46:03.579207', 'step': 3847, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:03.637153', 'step': 3847, 'epoch': 2} +{'type': 'loss', 'content': 0.010549718514084816, 'timestamp': '2025-09-10 02:46:03.643160', 'step': 3848, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:03.694868', 'step': 3848, 'epoch': 2} +{'type': 'loss', 'content': 0.005692584905773401, 'timestamp': '2025-09-10 02:46:03.697087', 'step': 3849, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:03.749780', 'step': 3849, 'epoch': 2} +{'type': 'loss', 'content': 0.006631883326917887, 'timestamp': '2025-09-10 02:46:03.756406', 'step': 3850, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:03.809533', 'step': 3850, 'epoch': 2} +{'type': 'loss', 'content': 0.011357649229466915, 'timestamp': '2025-09-10 02:46:03.812496', 'step': 3851, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:03.865762', 'step': 3851, 'epoch': 2} +{'type': 'loss', 'content': 0.023502331227064133, 'timestamp': '2025-09-10 02:46:03.871762', 'step': 3852, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:03.923680', 'step': 3852, 'epoch': 2} +{'type': 'loss', 'content': 0.003774035722017288, 'timestamp': '2025-09-10 02:46:03.930325', 'step': 3853, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:03.982958', 'step': 3853, 'epoch': 2} +{'type': 'loss', 'content': 0.0032003470696508884, 'timestamp': '2025-09-10 02:46:03.991152', 'step': 3854, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:04.044554', 'step': 3854, 'epoch': 2} +{'type': 'loss', 'content': 0.016245054081082344, 'timestamp': '2025-09-10 02:46:04.046844', 'step': 3855, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:04.098803', 'step': 3855, 'epoch': 2} +{'type': 'loss', 'content': 0.0008556434186175466, 'timestamp': '2025-09-10 02:46:04.104562', 'step': 3856, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:04.157094', 'step': 3856, 'epoch': 2} +{'type': 'loss', 'content': 0.00436136731877923, 'timestamp': '2025-09-10 02:46:04.159454', 'step': 3857, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:04.211886', 'step': 3857, 'epoch': 2} +{'type': 'loss', 'content': 0.005428866017609835, 'timestamp': '2025-09-10 02:46:04.214132', 'step': 3858, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:04.267266', 'step': 3858, 'epoch': 2} +{'type': 'loss', 'content': 0.02254888415336609, 'timestamp': '2025-09-10 02:46:04.273744', 'step': 3859, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:04.327066', 'step': 3859, 'epoch': 2} +{'type': 'loss', 'content': 0.004335789475589991, 'timestamp': '2025-09-10 02:46:04.332997', 'step': 3860, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:46:04.404872', 'step': 3860, 'epoch': 2} +{'type': 'loss', 'content': 0.001393162994645536, 'timestamp': '2025-09-10 02:46:04.419817', 'step': 3861, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:04.472889', 'step': 3861, 'epoch': 2} +{'type': 'loss', 'content': 0.0015901681035757065, 'timestamp': '2025-09-10 02:46:04.475251', 'step': 3862, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:04.528536', 'step': 3862, 'epoch': 2} +{'type': 'loss', 'content': 0.010006852447986603, 'timestamp': '2025-09-10 02:46:04.530781', 'step': 3863, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:04.583072', 'step': 3863, 'epoch': 2} +{'type': 'loss', 'content': 0.011894852854311466, 'timestamp': '2025-09-10 02:46:04.589042', 'step': 3864, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:46:04.655893', 'step': 3864, 'epoch': 2} +{'type': 'loss', 'content': 0.0011220744345337152, 'timestamp': '2025-09-10 02:46:04.669657', 'step': 3865, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:04.722855', 'step': 3865, 'epoch': 2} +{'type': 'loss', 'content': 0.0007972274906933308, 'timestamp': '2025-09-10 02:46:04.730989', 'step': 3866, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:04.784136', 'step': 3866, 'epoch': 2} +{'type': 'loss', 'content': 0.008162098936736584, 'timestamp': '2025-09-10 02:46:04.787043', 'step': 3867, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:04.840433', 'step': 3867, 'epoch': 2} +{'type': 'loss', 'content': 0.0030370864551514387, 'timestamp': '2025-09-10 02:46:04.846455', 'step': 3868, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:04.899994', 'step': 3868, 'epoch': 2} +{'type': 'loss', 'content': 0.00035197677789255977, 'timestamp': '2025-09-10 02:46:04.902163', 'step': 3869, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:04.956160', 'step': 3869, 'epoch': 2} +{'type': 'loss', 'content': 0.003160616382956505, 'timestamp': '2025-09-10 02:46:04.958647', 'step': 3870, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:05.012049', 'step': 3870, 'epoch': 2} +{'type': 'loss', 'content': 0.00044348399387672544, 'timestamp': '2025-09-10 02:46:05.014302', 'step': 3871, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:05.067991', 'step': 3871, 'epoch': 2} +{'type': 'loss', 'content': 5.3746625781059265e-05, 'timestamp': '2025-09-10 02:46:05.074048', 'step': 3872, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:05.126657', 'step': 3872, 'epoch': 2} +{'type': 'loss', 'content': 0.009846360422670841, 'timestamp': '2025-09-10 02:46:05.136644', 'step': 3873, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:05.190796', 'step': 3873, 'epoch': 2} +{'type': 'loss', 'content': 0.004170811735093594, 'timestamp': '2025-09-10 02:46:05.192983', 'step': 3874, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:05.246252', 'step': 3874, 'epoch': 2} +{'type': 'loss', 'content': 0.00024055906396824867, 'timestamp': '2025-09-10 02:46:05.252757', 'step': 3875, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:05.311281', 'step': 3875, 'epoch': 2} +{'type': 'loss', 'content': 0.010387812741100788, 'timestamp': '2025-09-10 02:46:05.321914', 'step': 3876, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:05.374218', 'step': 3876, 'epoch': 2} +{'type': 'loss', 'content': 0.0006725011044181883, 'timestamp': '2025-09-10 02:46:05.376571', 'step': 3877, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:05.429297', 'step': 3877, 'epoch': 2} +{'type': 'loss', 'content': 0.0012642050860449672, 'timestamp': '2025-09-10 02:46:05.432504', 'step': 3878, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:05.485635', 'step': 3878, 'epoch': 2} +{'type': 'loss', 'content': 0.003001450328156352, 'timestamp': '2025-09-10 02:46:05.487924', 'step': 3879, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:05.540771', 'step': 3879, 'epoch': 2} +{'type': 'loss', 'content': 0.0013148859143257141, 'timestamp': '2025-09-10 02:46:05.546789', 'step': 3880, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:05.599418', 'step': 3880, 'epoch': 2} +{'type': 'loss', 'content': 0.0005107710021547973, 'timestamp': '2025-09-10 02:46:05.601789', 'step': 3881, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:05.654552', 'step': 3881, 'epoch': 2} +{'type': 'loss', 'content': 0.003785243956372142, 'timestamp': '2025-09-10 02:46:05.657528', 'step': 3882, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:05.710094', 'step': 3882, 'epoch': 2} +{'type': 'loss', 'content': 0.00025510616251267493, 'timestamp': '2025-09-10 02:46:05.712386', 'step': 3883, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:46:05.780521', 'step': 3883, 'epoch': 2} +{'type': 'loss', 'content': 0.0003945929929614067, 'timestamp': '2025-09-10 02:46:05.793883', 'step': 3884, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:05.846887', 'step': 3884, 'epoch': 2} +{'type': 'loss', 'content': 0.0015666695544496179, 'timestamp': '2025-09-10 02:46:05.857222', 'step': 3885, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:05.910584', 'step': 3885, 'epoch': 2} +{'type': 'loss', 'content': 0.00791245698928833, 'timestamp': '2025-09-10 02:46:05.913000', 'step': 3886, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:05.966030', 'step': 3886, 'epoch': 2} +{'type': 'loss', 'content': 0.002426649909466505, 'timestamp': '2025-09-10 02:46:05.968352', 'step': 3887, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:06.021814', 'step': 3887, 'epoch': 2} +{'type': 'loss', 'content': 0.004155515693128109, 'timestamp': '2025-09-10 02:46:06.030641', 'step': 3888, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:06.083987', 'step': 3888, 'epoch': 2} +{'type': 'loss', 'content': 0.01504812203347683, 'timestamp': '2025-09-10 02:46:06.086431', 'step': 3889, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:46:06.159860', 'step': 3889, 'epoch': 2} +{'type': 'loss', 'content': 0.009594624862074852, 'timestamp': '2025-09-10 02:46:06.173568', 'step': 3890, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:06.227312', 'step': 3890, 'epoch': 2} +{'type': 'loss', 'content': 0.0032929691951721907, 'timestamp': '2025-09-10 02:46:06.233415', 'step': 3891, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:06.286848', 'step': 3891, 'epoch': 2} +{'type': 'loss', 'content': 0.0026436985936015844, 'timestamp': '2025-09-10 02:46:06.292967', 'step': 3892, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:06.345790', 'step': 3892, 'epoch': 2} +{'type': 'loss', 'content': 0.03356518596410751, 'timestamp': '2025-09-10 02:46:06.348113', 'step': 3893, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:06.401014', 'step': 3893, 'epoch': 2} +{'type': 'loss', 'content': 0.0003165427187923342, 'timestamp': '2025-09-10 02:46:06.403205', 'step': 3894, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:06.456127', 'step': 3894, 'epoch': 2} +{'type': 'loss', 'content': 0.0013966757105663419, 'timestamp': '2025-09-10 02:46:06.458619', 'step': 3895, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:06.511417', 'step': 3895, 'epoch': 2} +{'type': 'loss', 'content': 0.0018488811329007149, 'timestamp': '2025-09-10 02:46:06.518867', 'step': 3896, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:06.572627', 'step': 3896, 'epoch': 2} +{'type': 'loss', 'content': 0.00148906244430691, 'timestamp': '2025-09-10 02:46:06.575617', 'step': 3897, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:06.630487', 'step': 3897, 'epoch': 2} +{'type': 'loss', 'content': 0.01566730998456478, 'timestamp': '2025-09-10 02:46:06.634108', 'step': 3898, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:06.689415', 'step': 3898, 'epoch': 2} +{'type': 'loss', 'content': 0.008929139003157616, 'timestamp': '2025-09-10 02:46:06.697645', 'step': 3899, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:06.754706', 'step': 3899, 'epoch': 2} +{'type': 'loss', 'content': 0.00037877244176343083, 'timestamp': '2025-09-10 02:46:06.760997', 'step': 3900, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:06.813673', 'step': 3900, 'epoch': 2} +{'type': 'loss', 'content': 0.00015884646563790739, 'timestamp': '2025-09-10 02:46:06.820306', 'step': 3901, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:06.882060', 'step': 3901, 'epoch': 2} +{'type': 'loss', 'content': 0.00897427648305893, 'timestamp': '2025-09-10 02:46:06.892777', 'step': 3902, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:06.950779', 'step': 3902, 'epoch': 2} +{'type': 'loss', 'content': 0.0014491062611341476, 'timestamp': '2025-09-10 02:46:06.953778', 'step': 3903, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:07.008465', 'step': 3903, 'epoch': 2} +{'type': 'loss', 'content': 0.0001243895385414362, 'timestamp': '2025-09-10 02:46:07.014663', 'step': 3904, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:07.067582', 'step': 3904, 'epoch': 2} +{'type': 'loss', 'content': 0.001384457340463996, 'timestamp': '2025-09-10 02:46:07.074292', 'step': 3905, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:07.135019', 'step': 3905, 'epoch': 2} +{'type': 'loss', 'content': 0.0008130766800604761, 'timestamp': '2025-09-10 02:46:07.137902', 'step': 3906, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:07.196259', 'step': 3906, 'epoch': 2} +{'type': 'loss', 'content': 0.0007746769115328789, 'timestamp': '2025-09-10 02:46:07.199364', 'step': 3907, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:07.253549', 'step': 3907, 'epoch': 2} +{'type': 'loss', 'content': 0.0008873450569808483, 'timestamp': '2025-09-10 02:46:07.262485', 'step': 3908, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:07.318967', 'step': 3908, 'epoch': 2} +{'type': 'loss', 'content': 0.002876777434721589, 'timestamp': '2025-09-10 02:46:07.321661', 'step': 3909, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:07.377624', 'step': 3909, 'epoch': 2} +{'type': 'loss', 'content': 0.03569520264863968, 'timestamp': '2025-09-10 02:46:07.380450', 'step': 3910, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:07.435323', 'step': 3910, 'epoch': 2} +{'type': 'loss', 'content': 0.0016105091199278831, 'timestamp': '2025-09-10 02:46:07.438403', 'step': 3911, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:07.496230', 'step': 3911, 'epoch': 2} +{'type': 'loss', 'content': 0.0023835187312215567, 'timestamp': '2025-09-10 02:46:07.502827', 'step': 3912, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:07.557289', 'step': 3912, 'epoch': 2} +{'type': 'loss', 'content': 0.006838030181825161, 'timestamp': '2025-09-10 02:46:07.567784', 'step': 3913, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:07.622185', 'step': 3913, 'epoch': 2} +{'type': 'loss', 'content': 0.01887880079448223, 'timestamp': '2025-09-10 02:46:07.624804', 'step': 3914, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:07.679652', 'step': 3914, 'epoch': 2} +{'type': 'loss', 'content': 0.010709211230278015, 'timestamp': '2025-09-10 02:46:07.682351', 'step': 3915, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:07.735658', 'step': 3915, 'epoch': 2} +{'type': 'loss', 'content': 0.03275291249155998, 'timestamp': '2025-09-10 02:46:07.742387', 'step': 3916, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:07.799570', 'step': 3916, 'epoch': 2} +{'type': 'loss', 'content': 0.0008088816539384425, 'timestamp': '2025-09-10 02:46:07.801907', 'step': 3917, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:07.856492', 'step': 3917, 'epoch': 2} +{'type': 'loss', 'content': 0.0006239629001356661, 'timestamp': '2025-09-10 02:46:07.866086', 'step': 3918, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:07.921749', 'step': 3918, 'epoch': 2} +{'type': 'loss', 'content': 0.00020132127974648029, 'timestamp': '2025-09-10 02:46:07.931504', 'step': 3919, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:07.988521', 'step': 3919, 'epoch': 2} +{'type': 'loss', 'content': 0.002257634187117219, 'timestamp': '2025-09-10 02:46:07.995272', 'step': 3920, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:08.049662', 'step': 3920, 'epoch': 2} +{'type': 'loss', 'content': 0.016731226816773415, 'timestamp': '2025-09-10 02:46:08.054198', 'step': 3921, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:08.111044', 'step': 3921, 'epoch': 2} +{'type': 'loss', 'content': 0.0003627836413215846, 'timestamp': '2025-09-10 02:46:08.114466', 'step': 3922, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:08.167932', 'step': 3922, 'epoch': 2} +{'type': 'loss', 'content': 0.004804384894669056, 'timestamp': '2025-09-10 02:46:08.170479', 'step': 3923, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:08.226652', 'step': 3923, 'epoch': 2} +{'type': 'loss', 'content': 0.010894840583205223, 'timestamp': '2025-09-10 02:46:08.235706', 'step': 3924, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:08.294378', 'step': 3924, 'epoch': 2} +{'type': 'loss', 'content': 0.04266980290412903, 'timestamp': '2025-09-10 02:46:08.298076', 'step': 3925, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:08.358574', 'step': 3925, 'epoch': 2} +{'type': 'loss', 'content': 0.0008919899119064212, 'timestamp': '2025-09-10 02:46:08.369025', 'step': 3926, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:08.423061', 'step': 3926, 'epoch': 2} +{'type': 'loss', 'content': 0.0006377916433848441, 'timestamp': '2025-09-10 02:46:08.425213', 'step': 3927, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:08.478773', 'step': 3927, 'epoch': 2} +{'type': 'loss', 'content': 0.008751966990530491, 'timestamp': '2025-09-10 02:46:08.484776', 'step': 3928, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:08.543298', 'step': 3928, 'epoch': 2} +{'type': 'loss', 'content': 0.0013560999650508165, 'timestamp': '2025-09-10 02:46:08.554885', 'step': 3929, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:08.608429', 'step': 3929, 'epoch': 2} +{'type': 'loss', 'content': 0.005471502896398306, 'timestamp': '2025-09-10 02:46:08.618058', 'step': 3930, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:08.671270', 'step': 3930, 'epoch': 2} +{'type': 'loss', 'content': 0.0013103344244882464, 'timestamp': '2025-09-10 02:46:08.673895', 'step': 3931, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:08.726901', 'step': 3931, 'epoch': 2} +{'type': 'loss', 'content': 0.0003612770524341613, 'timestamp': '2025-09-10 02:46:08.733063', 'step': 3932, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:08.786058', 'step': 3932, 'epoch': 2} +{'type': 'loss', 'content': 0.000807850738056004, 'timestamp': '2025-09-10 02:46:08.788140', 'step': 3933, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:08.841313', 'step': 3933, 'epoch': 2} +{'type': 'loss', 'content': 0.022914016619324684, 'timestamp': '2025-09-10 02:46:08.843692', 'step': 3934, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:08.897066', 'step': 3934, 'epoch': 2} +{'type': 'loss', 'content': 0.0017113996436819434, 'timestamp': '2025-09-10 02:46:08.899401', 'step': 3935, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:08.951954', 'step': 3935, 'epoch': 2} +{'type': 'loss', 'content': 0.0004442400822881609, 'timestamp': '2025-09-10 02:46:08.958020', 'step': 3936, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:09.010532', 'step': 3936, 'epoch': 2} +{'type': 'loss', 'content': 0.005631075706332922, 'timestamp': '2025-09-10 02:46:09.012831', 'step': 3937, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:09.065523', 'step': 3937, 'epoch': 2} +{'type': 'loss', 'content': 0.012641689740121365, 'timestamp': '2025-09-10 02:46:09.067892', 'step': 3938, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:09.120651', 'step': 3938, 'epoch': 2} +{'type': 'loss', 'content': 3.472653406788595e-05, 'timestamp': '2025-09-10 02:46:09.122780', 'step': 3939, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:09.176334', 'step': 3939, 'epoch': 2} +{'type': 'loss', 'content': 0.003270581131801009, 'timestamp': '2025-09-10 02:46:09.186744', 'step': 3940, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:09.239169', 'step': 3940, 'epoch': 2} +{'type': 'loss', 'content': 0.0017104193102568388, 'timestamp': '2025-09-10 02:46:09.241235', 'step': 3941, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:09.293931', 'step': 3941, 'epoch': 2} +{'type': 'loss', 'content': 0.012531804852187634, 'timestamp': '2025-09-10 02:46:09.302176', 'step': 3942, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:09.356597', 'step': 3942, 'epoch': 2} +{'type': 'loss', 'content': 0.0021646064706146717, 'timestamp': '2025-09-10 02:46:09.366332', 'step': 3943, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:09.419071', 'step': 3943, 'epoch': 2} +{'type': 'loss', 'content': 0.0002670682442840189, 'timestamp': '2025-09-10 02:46:09.424801', 'step': 3944, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:09.477180', 'step': 3944, 'epoch': 2} +{'type': 'loss', 'content': 0.020014271140098572, 'timestamp': '2025-09-10 02:46:09.483903', 'step': 3945, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:09.537785', 'step': 3945, 'epoch': 2} +{'type': 'loss', 'content': 2.7008974939235486e-05, 'timestamp': '2025-09-10 02:46:09.540215', 'step': 3946, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:09.593962', 'step': 3946, 'epoch': 2} +{'type': 'loss', 'content': 0.0025508219841867685, 'timestamp': '2025-09-10 02:46:09.601521', 'step': 3947, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:09.655467', 'step': 3947, 'epoch': 2} +{'type': 'loss', 'content': 0.004302311688661575, 'timestamp': '2025-09-10 02:46:09.662526', 'step': 3948, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:09.715074', 'step': 3948, 'epoch': 2} +{'type': 'loss', 'content': 0.012611711397767067, 'timestamp': '2025-09-10 02:46:09.717917', 'step': 3949, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:09.770226', 'step': 3949, 'epoch': 2} +{'type': 'loss', 'content': 0.00032069970620796084, 'timestamp': '2025-09-10 02:46:09.772337', 'step': 3950, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:09.825996', 'step': 3950, 'epoch': 2} +{'type': 'loss', 'content': 0.0009283372201025486, 'timestamp': '2025-09-10 02:46:09.835625', 'step': 3951, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:09.888586', 'step': 3951, 'epoch': 2} +{'type': 'loss', 'content': 0.019986514002084732, 'timestamp': '2025-09-10 02:46:09.894271', 'step': 3952, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:09.946680', 'step': 3952, 'epoch': 2} +{'type': 'loss', 'content': 0.010985749773681164, 'timestamp': '2025-09-10 02:46:09.948734', 'step': 3953, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:10.001657', 'step': 3953, 'epoch': 2} +{'type': 'loss', 'content': 0.00025457629817537963, 'timestamp': '2025-09-10 02:46:10.008320', 'step': 3954, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:10.061189', 'step': 3954, 'epoch': 2} +{'type': 'loss', 'content': 0.00038253565435297787, 'timestamp': '2025-09-10 02:46:10.067859', 'step': 3955, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:10.121046', 'step': 3955, 'epoch': 2} +{'type': 'loss', 'content': 9.885052713798359e-05, 'timestamp': '2025-09-10 02:46:10.126718', 'step': 3956, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:10.182870', 'step': 3956, 'epoch': 2} +{'type': 'loss', 'content': 0.0003262778918724507, 'timestamp': '2025-09-10 02:46:10.194036', 'step': 3957, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:10.247119', 'step': 3957, 'epoch': 2} +{'type': 'loss', 'content': 0.039545975625514984, 'timestamp': '2025-09-10 02:46:10.249114', 'step': 3958, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:10.302706', 'step': 3958, 'epoch': 2} +{'type': 'loss', 'content': 0.037021856755018234, 'timestamp': '2025-09-10 02:46:10.312005', 'step': 3959, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:10.365560', 'step': 3959, 'epoch': 2} +{'type': 'loss', 'content': 0.0010134039912372828, 'timestamp': '2025-09-10 02:46:10.371700', 'step': 3960, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:10.425243', 'step': 3960, 'epoch': 2} +{'type': 'loss', 'content': 0.001198066514916718, 'timestamp': '2025-09-10 02:46:10.435761', 'step': 3961, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:10.488616', 'step': 3961, 'epoch': 2} +{'type': 'loss', 'content': 0.00010575448686722666, 'timestamp': '2025-09-10 02:46:10.490784', 'step': 3962, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:10.543843', 'step': 3962, 'epoch': 2} +{'type': 'loss', 'content': 0.0008649599039927125, 'timestamp': '2025-09-10 02:46:10.545896', 'step': 3963, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:10.598553', 'step': 3963, 'epoch': 2} +{'type': 'loss', 'content': 0.014276156201958656, 'timestamp': '2025-09-10 02:46:10.604380', 'step': 3964, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:10.656623', 'step': 3964, 'epoch': 2} +{'type': 'loss', 'content': 0.0019015094731003046, 'timestamp': '2025-09-10 02:46:10.658607', 'step': 3965, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:10.710982', 'step': 3965, 'epoch': 2} +{'type': 'loss', 'content': 0.028332578018307686, 'timestamp': '2025-09-10 02:46:10.713059', 'step': 3966, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:10.765377', 'step': 3966, 'epoch': 2} +{'type': 'loss', 'content': 0.004202342126518488, 'timestamp': '2025-09-10 02:46:10.767530', 'step': 3967, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:10.820455', 'step': 3967, 'epoch': 2} +{'type': 'loss', 'content': 0.0185786634683609, 'timestamp': '2025-09-10 02:46:10.826149', 'step': 3968, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:10.878803', 'step': 3968, 'epoch': 2} +{'type': 'loss', 'content': 0.002318680752068758, 'timestamp': '2025-09-10 02:46:10.880842', 'step': 3969, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:46:27.780620', 'step': 3969, 'epoch': 2} +{'type': 'pplx', 'content': 27331700.28547191, 'timestamp': '2025-09-10 02:46:27.783394', 'step': 3969, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:27.842785', 'step': 3969, 'epoch': 2} +{'type': 'loss', 'content': 0.0005022897967137396, 'timestamp': '2025-09-10 02:46:27.853452', 'step': 3970, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:46:27.922791', 'step': 3970, 'epoch': 2} +{'type': 'loss', 'content': 0.0065833283588290215, 'timestamp': '2025-09-10 02:46:27.935283', 'step': 3971, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:27.989471', 'step': 3971, 'epoch': 2} +{'type': 'loss', 'content': 0.01841031201183796, 'timestamp': '2025-09-10 02:46:27.995776', 'step': 3972, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:28.048495', 'step': 3972, 'epoch': 2} +{'type': 'loss', 'content': 0.009554126299917698, 'timestamp': '2025-09-10 02:46:28.055010', 'step': 3973, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:28.109030', 'step': 3973, 'epoch': 2} +{'type': 'loss', 'content': 0.001987367169931531, 'timestamp': '2025-09-10 02:46:28.111240', 'step': 3974, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:28.164887', 'step': 3974, 'epoch': 2} +{'type': 'loss', 'content': 0.00036607650690712035, 'timestamp': '2025-09-10 02:46:28.167257', 'step': 3975, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:28.221557', 'step': 3975, 'epoch': 2} +{'type': 'loss', 'content': 5.28004347870592e-05, 'timestamp': '2025-09-10 02:46:28.231965', 'step': 3976, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:28.285192', 'step': 3976, 'epoch': 2} +{'type': 'loss', 'content': 0.0037857815623283386, 'timestamp': '2025-09-10 02:46:28.287599', 'step': 3977, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:28.340544', 'step': 3977, 'epoch': 2} +{'type': 'loss', 'content': 0.025487679988145828, 'timestamp': '2025-09-10 02:46:28.343551', 'step': 3978, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:28.398428', 'step': 3978, 'epoch': 2} +{'type': 'loss', 'content': 0.012377269566059113, 'timestamp': '2025-09-10 02:46:28.408282', 'step': 3979, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:28.461600', 'step': 3979, 'epoch': 2} +{'type': 'loss', 'content': 0.01761629618704319, 'timestamp': '2025-09-10 02:46:28.467446', 'step': 3980, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:28.519647', 'step': 3980, 'epoch': 2} +{'type': 'loss', 'content': 0.007856716401875019, 'timestamp': '2025-09-10 02:46:28.522670', 'step': 3981, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:28.575545', 'step': 3981, 'epoch': 2} +{'type': 'loss', 'content': 0.006854567211121321, 'timestamp': '2025-09-10 02:46:28.577654', 'step': 3982, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:28.631410', 'step': 3982, 'epoch': 2} +{'type': 'loss', 'content': 0.016330325976014137, 'timestamp': '2025-09-10 02:46:28.641058', 'step': 3983, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:28.694502', 'step': 3983, 'epoch': 2} +{'type': 'loss', 'content': 0.0017446457641199231, 'timestamp': '2025-09-10 02:46:28.700113', 'step': 3984, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:28.752353', 'step': 3984, 'epoch': 2} +{'type': 'loss', 'content': 0.015398569405078888, 'timestamp': '2025-09-10 02:46:28.755123', 'step': 3985, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:28.808978', 'step': 3985, 'epoch': 2} +{'type': 'loss', 'content': 0.000944934319704771, 'timestamp': '2025-09-10 02:46:28.811453', 'step': 3986, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:28.865486', 'step': 3986, 'epoch': 2} +{'type': 'loss', 'content': 0.016267871484160423, 'timestamp': '2025-09-10 02:46:28.871815', 'step': 3987, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:28.925285', 'step': 3987, 'epoch': 2} +{'type': 'loss', 'content': 0.0010377265280112624, 'timestamp': '2025-09-10 02:46:28.930982', 'step': 3988, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:28.983954', 'step': 3988, 'epoch': 2} +{'type': 'loss', 'content': 0.006066413130611181, 'timestamp': '2025-09-10 02:46:28.990446', 'step': 3989, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:29.043772', 'step': 3989, 'epoch': 2} +{'type': 'loss', 'content': 0.02377425506711006, 'timestamp': '2025-09-10 02:46:29.046013', 'step': 3990, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:29.098777', 'step': 3990, 'epoch': 2} +{'type': 'loss', 'content': 0.004776769317686558, 'timestamp': '2025-09-10 02:46:29.105285', 'step': 3991, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:29.158719', 'step': 3991, 'epoch': 2} +{'type': 'loss', 'content': 0.007428622338920832, 'timestamp': '2025-09-10 02:46:29.164756', 'step': 3992, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:29.216945', 'step': 3992, 'epoch': 2} +{'type': 'loss', 'content': 0.0014080842956900597, 'timestamp': '2025-09-10 02:46:29.219189', 'step': 3993, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:29.272226', 'step': 3993, 'epoch': 2} +{'type': 'loss', 'content': 0.0012848807964473963, 'timestamp': '2025-09-10 02:46:29.275274', 'step': 3994, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:29.328492', 'step': 3994, 'epoch': 2} +{'type': 'loss', 'content': 0.003719982458278537, 'timestamp': '2025-09-10 02:46:29.330824', 'step': 3995, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:29.383977', 'step': 3995, 'epoch': 2} +{'type': 'loss', 'content': 0.02686316706240177, 'timestamp': '2025-09-10 02:46:29.389726', 'step': 3996, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:29.442493', 'step': 3996, 'epoch': 2} +{'type': 'loss', 'content': 0.0009413144434802234, 'timestamp': '2025-09-10 02:46:29.445385', 'step': 3997, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:29.498492', 'step': 3997, 'epoch': 2} +{'type': 'loss', 'content': 0.004147673025727272, 'timestamp': '2025-09-10 02:46:29.505188', 'step': 3998, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:29.558431', 'step': 3998, 'epoch': 2} +{'type': 'loss', 'content': 0.000486402481328696, 'timestamp': '2025-09-10 02:46:29.560786', 'step': 3999, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:29.614659', 'step': 3999, 'epoch': 2} +{'type': 'loss', 'content': 0.0014573262305930257, 'timestamp': '2025-09-10 02:46:29.625056', 'step': 4000, 'epoch': 2} +{'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-10 02:46:30.127771', 'step': 4000, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:30.185801', 'step': 4000, 'epoch': 2} +{'type': 'loss', 'content': 0.0013435414293780923, 'timestamp': '2025-09-10 02:46:30.188086', 'step': 4001, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:30.242576', 'step': 4001, 'epoch': 2} +{'type': 'loss', 'content': 0.003639021422713995, 'timestamp': '2025-09-10 02:46:30.244928', 'step': 4002, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:30.298024', 'step': 4002, 'epoch': 2} +{'type': 'loss', 'content': 0.00489605451002717, 'timestamp': '2025-09-10 02:46:30.300191', 'step': 4003, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:30.353499', 'step': 4003, 'epoch': 2} +{'type': 'loss', 'content': 0.0009414904634468257, 'timestamp': '2025-09-10 02:46:30.359542', 'step': 4004, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:30.418567', 'step': 4004, 'epoch': 2} +{'type': 'loss', 'content': 0.004596610087901354, 'timestamp': '2025-09-10 02:46:30.430127', 'step': 4005, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:30.483390', 'step': 4005, 'epoch': 2} +{'type': 'loss', 'content': 0.0251252893358469, 'timestamp': '2025-09-10 02:46:30.485432', 'step': 4006, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:30.538287', 'step': 4006, 'epoch': 2} +{'type': 'loss', 'content': 0.00520761264488101, 'timestamp': '2025-09-10 02:46:30.541212', 'step': 4007, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:30.594180', 'step': 4007, 'epoch': 2} +{'type': 'loss', 'content': 0.005466113798320293, 'timestamp': '2025-09-10 02:46:30.600005', 'step': 4008, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:30.652682', 'step': 4008, 'epoch': 2} +{'type': 'loss', 'content': 0.00451264763250947, 'timestamp': '2025-09-10 02:46:30.654938', 'step': 4009, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:46:30.721196', 'step': 4009, 'epoch': 2} +{'type': 'loss', 'content': 0.01480143517255783, 'timestamp': '2025-09-10 02:46:30.733383', 'step': 4010, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:30.787204', 'step': 4010, 'epoch': 2} +{'type': 'loss', 'content': 0.0025095022283494473, 'timestamp': '2025-09-10 02:46:30.789308', 'step': 4011, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:30.843520', 'step': 4011, 'epoch': 2} +{'type': 'loss', 'content': 0.0075421747751533985, 'timestamp': '2025-09-10 02:46:30.849312', 'step': 4012, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:30.902021', 'step': 4012, 'epoch': 2} +{'type': 'loss', 'content': 0.03990047797560692, 'timestamp': '2025-09-10 02:46:30.904301', 'step': 4013, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:46:30.970850', 'step': 4013, 'epoch': 2} +{'type': 'loss', 'content': 0.004559301305562258, 'timestamp': '2025-09-10 02:46:30.983058', 'step': 4014, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:46:31.055905', 'step': 4014, 'epoch': 2} +{'type': 'loss', 'content': 0.011345594190061092, 'timestamp': '2025-09-10 02:46:31.069336', 'step': 4015, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:31.122943', 'step': 4015, 'epoch': 2} +{'type': 'loss', 'content': 0.005087972152978182, 'timestamp': '2025-09-10 02:46:31.128597', 'step': 4016, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:31.180958', 'step': 4016, 'epoch': 2} +{'type': 'loss', 'content': 0.0014413511380553246, 'timestamp': '2025-09-10 02:46:31.182951', 'step': 4017, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:31.237881', 'step': 4017, 'epoch': 2} +{'type': 'loss', 'content': 0.00031759761623106897, 'timestamp': '2025-09-10 02:46:31.247690', 'step': 4018, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:31.303449', 'step': 4018, 'epoch': 2} +{'type': 'loss', 'content': 0.011032961308956146, 'timestamp': '2025-09-10 02:46:31.313206', 'step': 4019, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:46:31.388472', 'step': 4019, 'epoch': 2} +{'type': 'loss', 'content': 0.04681537672877312, 'timestamp': '2025-09-10 02:46:31.403193', 'step': 4020, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:31.456314', 'step': 4020, 'epoch': 2} +{'type': 'loss', 'content': 0.0014196854317560792, 'timestamp': '2025-09-10 02:46:31.466774', 'step': 4021, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:31.520003', 'step': 4021, 'epoch': 2} +{'type': 'loss', 'content': 0.007935740984976292, 'timestamp': '2025-09-10 02:46:31.522087', 'step': 4022, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:31.574996', 'step': 4022, 'epoch': 2} +{'type': 'loss', 'content': 0.009013201110064983, 'timestamp': '2025-09-10 02:46:31.576865', 'step': 4023, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:31.629780', 'step': 4023, 'epoch': 2} +{'type': 'loss', 'content': 0.006031523924320936, 'timestamp': '2025-09-10 02:46:31.635567', 'step': 4024, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:31.688163', 'step': 4024, 'epoch': 2} +{'type': 'loss', 'content': 0.005195514764636755, 'timestamp': '2025-09-10 02:46:31.698289', 'step': 4025, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:31.752387', 'step': 4025, 'epoch': 2} +{'type': 'loss', 'content': 0.014662462286651134, 'timestamp': '2025-09-10 02:46:31.760428', 'step': 4026, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:31.814088', 'step': 4026, 'epoch': 2} +{'type': 'loss', 'content': 0.049105431884527206, 'timestamp': '2025-09-10 02:46:31.816032', 'step': 4027, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:31.869151', 'step': 4027, 'epoch': 2} +{'type': 'loss', 'content': 0.004056194331496954, 'timestamp': '2025-09-10 02:46:31.875013', 'step': 4028, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:31.927775', 'step': 4028, 'epoch': 2} +{'type': 'loss', 'content': 0.0005449273739941418, 'timestamp': '2025-09-10 02:46:31.929950', 'step': 4029, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:31.983142', 'step': 4029, 'epoch': 2} +{'type': 'loss', 'content': 0.009754737839102745, 'timestamp': '2025-09-10 02:46:31.985495', 'step': 4030, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:32.038685', 'step': 4030, 'epoch': 2} +{'type': 'loss', 'content': 0.0011415573535487056, 'timestamp': '2025-09-10 02:46:32.044943', 'step': 4031, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:32.098081', 'step': 4031, 'epoch': 2} +{'type': 'loss', 'content': 0.00044507390703074634, 'timestamp': '2025-09-10 02:46:32.103788', 'step': 4032, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:32.156151', 'step': 4032, 'epoch': 2} +{'type': 'loss', 'content': 0.036054935306310654, 'timestamp': '2025-09-10 02:46:32.166179', 'step': 4033, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:32.219260', 'step': 4033, 'epoch': 2} +{'type': 'loss', 'content': 0.011584201827645302, 'timestamp': '2025-09-10 02:46:32.221269', 'step': 4034, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:32.273949', 'step': 4034, 'epoch': 2} +{'type': 'loss', 'content': 0.0009119933820329607, 'timestamp': '2025-09-10 02:46:32.276113', 'step': 4035, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:32.329356', 'step': 4035, 'epoch': 2} +{'type': 'loss', 'content': 0.0072877854108810425, 'timestamp': '2025-09-10 02:46:32.338247', 'step': 4036, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:46:32.405901', 'step': 4036, 'epoch': 2} +{'type': 'loss', 'content': 0.0037533354479819536, 'timestamp': '2025-09-10 02:46:32.419669', 'step': 4037, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:32.473546', 'step': 4037, 'epoch': 2} +{'type': 'loss', 'content': 0.003054562257602811, 'timestamp': '2025-09-10 02:46:32.475790', 'step': 4038, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:32.538199', 'step': 4038, 'epoch': 2} +{'type': 'loss', 'content': 0.02401849813759327, 'timestamp': '2025-09-10 02:46:32.549304', 'step': 4039, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:32.602707', 'step': 4039, 'epoch': 2} +{'type': 'loss', 'content': 0.0076024653390049934, 'timestamp': '2025-09-10 02:46:32.608645', 'step': 4040, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:46:32.669052', 'step': 4040, 'epoch': 2} +{'type': 'loss', 'content': 0.0013744918396696448, 'timestamp': '2025-09-10 02:46:32.680813', 'step': 4041, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:32.734681', 'step': 4041, 'epoch': 2} +{'type': 'loss', 'content': 0.007665363140404224, 'timestamp': '2025-09-10 02:46:32.737000', 'step': 4042, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:32.790469', 'step': 4042, 'epoch': 2} +{'type': 'loss', 'content': 0.0051115090027451515, 'timestamp': '2025-09-10 02:46:32.792911', 'step': 4043, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:32.846325', 'step': 4043, 'epoch': 2} +{'type': 'loss', 'content': 0.00018724999972619116, 'timestamp': '2025-09-10 02:46:32.852418', 'step': 4044, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:32.908622', 'step': 4044, 'epoch': 2} +{'type': 'loss', 'content': 0.021889707073569298, 'timestamp': '2025-09-10 02:46:32.919738', 'step': 4045, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:32.974537', 'step': 4045, 'epoch': 2} +{'type': 'loss', 'content': 0.005109102930873632, 'timestamp': '2025-09-10 02:46:32.983778', 'step': 4046, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:33.037981', 'step': 4046, 'epoch': 2} +{'type': 'loss', 'content': 0.0011025074636563659, 'timestamp': '2025-09-10 02:46:33.043490', 'step': 4047, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:33.097768', 'step': 4047, 'epoch': 2} +{'type': 'loss', 'content': 0.007205640431493521, 'timestamp': '2025-09-10 02:46:33.104904', 'step': 4048, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:33.159819', 'step': 4048, 'epoch': 2} +{'type': 'loss', 'content': 0.029010960832238197, 'timestamp': '2025-09-10 02:46:33.161833', 'step': 4049, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:33.215100', 'step': 4049, 'epoch': 2} +{'type': 'loss', 'content': 0.005890341941267252, 'timestamp': '2025-09-10 02:46:33.217970', 'step': 4050, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:33.270944', 'step': 4050, 'epoch': 2} +{'type': 'loss', 'content': 0.03647368401288986, 'timestamp': '2025-09-10 02:46:33.273151', 'step': 4051, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:33.327253', 'step': 4051, 'epoch': 2} +{'type': 'loss', 'content': 0.0008288768003694713, 'timestamp': '2025-09-10 02:46:33.336119', 'step': 4052, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:33.389070', 'step': 4052, 'epoch': 2} +{'type': 'loss', 'content': 0.0015556575963273644, 'timestamp': '2025-09-10 02:46:33.395123', 'step': 4053, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:33.449106', 'step': 4053, 'epoch': 2} +{'type': 'loss', 'content': 0.02547958306968212, 'timestamp': '2025-09-10 02:46:33.458694', 'step': 4054, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:33.512631', 'step': 4054, 'epoch': 2} +{'type': 'loss', 'content': 0.005097005516290665, 'timestamp': '2025-09-10 02:46:33.515307', 'step': 4055, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:33.576271', 'step': 4055, 'epoch': 2} +{'type': 'loss', 'content': 0.005207826849073172, 'timestamp': '2025-09-10 02:46:33.587777', 'step': 4056, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:46:33.653775', 'step': 4056, 'epoch': 2} +{'type': 'loss', 'content': 0.01705167628824711, 'timestamp': '2025-09-10 02:46:33.667429', 'step': 4057, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:33.721327', 'step': 4057, 'epoch': 2} +{'type': 'loss', 'content': 0.007770686410367489, 'timestamp': '2025-09-10 02:46:33.723717', 'step': 4058, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:33.778526', 'step': 4058, 'epoch': 2} +{'type': 'loss', 'content': 0.009561757557094097, 'timestamp': '2025-09-10 02:46:33.780979', 'step': 4059, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:33.839241', 'step': 4059, 'epoch': 2} +{'type': 'loss', 'content': 0.01864950731396675, 'timestamp': '2025-09-10 02:46:33.850473', 'step': 4060, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:33.903702', 'step': 4060, 'epoch': 2} +{'type': 'loss', 'content': 0.0028672043699771166, 'timestamp': '2025-09-10 02:46:33.909722', 'step': 4061, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:33.963254', 'step': 4061, 'epoch': 2} +{'type': 'loss', 'content': 0.007765418849885464, 'timestamp': '2025-09-10 02:46:33.969365', 'step': 4062, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:34.022959', 'step': 4062, 'epoch': 2} +{'type': 'loss', 'content': 0.009162651374936104, 'timestamp': '2025-09-10 02:46:34.024841', 'step': 4063, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:34.077548', 'step': 4063, 'epoch': 2} +{'type': 'loss', 'content': 0.011173618957400322, 'timestamp': '2025-09-10 02:46:34.083350', 'step': 4064, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:34.136649', 'step': 4064, 'epoch': 2} +{'type': 'loss', 'content': 0.0040217977948486805, 'timestamp': '2025-09-10 02:46:34.147154', 'step': 4065, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:34.202270', 'step': 4065, 'epoch': 2} +{'type': 'loss', 'content': 0.0023825322277843952, 'timestamp': '2025-09-10 02:46:34.204341', 'step': 4066, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:34.257349', 'step': 4066, 'epoch': 2} +{'type': 'loss', 'content': 0.013289921917021275, 'timestamp': '2025-09-10 02:46:34.263737', 'step': 4067, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:34.317145', 'step': 4067, 'epoch': 2} +{'type': 'loss', 'content': 0.04988854378461838, 'timestamp': '2025-09-10 02:46:34.322863', 'step': 4068, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:34.376189', 'step': 4068, 'epoch': 2} +{'type': 'loss', 'content': 0.02614821121096611, 'timestamp': '2025-09-10 02:46:34.378449', 'step': 4069, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:34.439468', 'step': 4069, 'epoch': 2} +{'type': 'loss', 'content': 0.007697419263422489, 'timestamp': '2025-09-10 02:46:34.450143', 'step': 4070, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:34.503939', 'step': 4070, 'epoch': 2} +{'type': 'loss', 'content': 0.014285015873610973, 'timestamp': '2025-09-10 02:46:34.506479', 'step': 4071, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:34.559735', 'step': 4071, 'epoch': 2} +{'type': 'loss', 'content': 0.0029927226714789867, 'timestamp': '2025-09-10 02:46:34.565945', 'step': 4072, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:34.618351', 'step': 4072, 'epoch': 2} +{'type': 'loss', 'content': 0.009025800041854382, 'timestamp': '2025-09-10 02:46:34.621296', 'step': 4073, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:34.674181', 'step': 4073, 'epoch': 2} +{'type': 'loss', 'content': 0.0026111751794815063, 'timestamp': '2025-09-10 02:46:34.675979', 'step': 4074, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:46:34.756219', 'step': 4074, 'epoch': 2} +{'type': 'loss', 'content': 0.004143944941461086, 'timestamp': '2025-09-10 02:46:34.771333', 'step': 4075, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:34.824390', 'step': 4075, 'epoch': 2} +{'type': 'loss', 'content': 0.007681169547140598, 'timestamp': '2025-09-10 02:46:34.830292', 'step': 4076, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:34.884006', 'step': 4076, 'epoch': 2} +{'type': 'loss', 'content': 0.003643867326900363, 'timestamp': '2025-09-10 02:46:34.894520', 'step': 4077, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:34.946997', 'step': 4077, 'epoch': 2} +{'type': 'loss', 'content': 0.012861545197665691, 'timestamp': '2025-09-10 02:46:34.949107', 'step': 4078, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:35.002434', 'step': 4078, 'epoch': 2} +{'type': 'loss', 'content': 0.002178559545427561, 'timestamp': '2025-09-10 02:46:35.010494', 'step': 4079, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:35.064456', 'step': 4079, 'epoch': 2} +{'type': 'loss', 'content': 0.010461727157235146, 'timestamp': '2025-09-10 02:46:35.070400', 'step': 4080, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:35.130962', 'step': 4080, 'epoch': 2} +{'type': 'loss', 'content': 0.002877767663449049, 'timestamp': '2025-09-10 02:46:35.142973', 'step': 4081, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:35.197186', 'step': 4081, 'epoch': 2} +{'type': 'loss', 'content': 0.01713237166404724, 'timestamp': '2025-09-10 02:46:35.203355', 'step': 4082, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:35.259784', 'step': 4082, 'epoch': 2} +{'type': 'loss', 'content': 0.009582663886249065, 'timestamp': '2025-09-10 02:46:35.261889', 'step': 4083, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:35.322784', 'step': 4083, 'epoch': 2} +{'type': 'loss', 'content': 0.021030381321907043, 'timestamp': '2025-09-10 02:46:35.334228', 'step': 4084, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:35.389030', 'step': 4084, 'epoch': 2} +{'type': 'loss', 'content': 0.040052540600299835, 'timestamp': '2025-09-10 02:46:35.391771', 'step': 4085, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:35.446022', 'step': 4085, 'epoch': 2} +{'type': 'loss', 'content': 0.016354167833924294, 'timestamp': '2025-09-10 02:46:35.448986', 'step': 4086, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:35.504182', 'step': 4086, 'epoch': 2} +{'type': 'loss', 'content': 0.000843790709041059, 'timestamp': '2025-09-10 02:46:35.509222', 'step': 4087, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:35.565620', 'step': 4087, 'epoch': 2} +{'type': 'loss', 'content': 0.003825886407867074, 'timestamp': '2025-09-10 02:46:35.572559', 'step': 4088, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:35.630489', 'step': 4088, 'epoch': 2} +{'type': 'loss', 'content': 0.01318382564932108, 'timestamp': '2025-09-10 02:46:35.633135', 'step': 4089, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:46:35.704178', 'step': 4089, 'epoch': 2} +{'type': 'loss', 'content': 0.035097163170576096, 'timestamp': '2025-09-10 02:46:35.716732', 'step': 4090, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:35.776278', 'step': 4090, 'epoch': 2} +{'type': 'loss', 'content': 0.01913559064269066, 'timestamp': '2025-09-10 02:46:35.779224', 'step': 4091, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:35.837253', 'step': 4091, 'epoch': 2} +{'type': 'loss', 'content': 0.011474421247839928, 'timestamp': '2025-09-10 02:46:35.844165', 'step': 4092, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:35.901004', 'step': 4092, 'epoch': 2} +{'type': 'loss', 'content': 0.0012699338840320706, 'timestamp': '2025-09-10 02:46:35.904307', 'step': 4093, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:35.965682', 'step': 4093, 'epoch': 2} +{'type': 'loss', 'content': 0.006102017126977444, 'timestamp': '2025-09-10 02:46:35.968842', 'step': 4094, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:36.022783', 'step': 4094, 'epoch': 2} +{'type': 'loss', 'content': 0.001885882462374866, 'timestamp': '2025-09-10 02:46:36.025315', 'step': 4095, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:36.084608', 'step': 4095, 'epoch': 2} +{'type': 'loss', 'content': 0.010418168269097805, 'timestamp': '2025-09-10 02:46:36.095778', 'step': 4096, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:36.150720', 'step': 4096, 'epoch': 2} +{'type': 'loss', 'content': 0.005161702632904053, 'timestamp': '2025-09-10 02:46:36.153588', 'step': 4097, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:36.207978', 'step': 4097, 'epoch': 2} +{'type': 'loss', 'content': 0.002099298406392336, 'timestamp': '2025-09-10 02:46:36.211789', 'step': 4098, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:36.266671', 'step': 4098, 'epoch': 2} +{'type': 'loss', 'content': 0.0003340943658258766, 'timestamp': '2025-09-10 02:46:36.270009', 'step': 4099, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:46:36.368248', 'step': 4099, 'epoch': 2} +{'type': 'loss', 'content': 0.009731748141348362, 'timestamp': '2025-09-10 02:46:36.386175', 'step': 4100, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:36.446100', 'step': 4100, 'epoch': 2} +{'type': 'loss', 'content': 0.001977026928216219, 'timestamp': '2025-09-10 02:46:36.449830', 'step': 4101, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:36.504415', 'step': 4101, 'epoch': 2} +{'type': 'loss', 'content': 0.0008826405392028391, 'timestamp': '2025-09-10 02:46:36.514050', 'step': 4102, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:36.570686', 'step': 4102, 'epoch': 2} +{'type': 'loss', 'content': 0.002490654354915023, 'timestamp': '2025-09-10 02:46:36.574451', 'step': 4103, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:36.630946', 'step': 4103, 'epoch': 2} +{'type': 'loss', 'content': 0.0013069234555587173, 'timestamp': '2025-09-10 02:46:36.640436', 'step': 4104, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:36.697039', 'step': 4104, 'epoch': 2} +{'type': 'loss', 'content': 0.0011374709429219365, 'timestamp': '2025-09-10 02:46:36.699273', 'step': 4105, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:36.761621', 'step': 4105, 'epoch': 2} +{'type': 'loss', 'content': 0.007636964786797762, 'timestamp': '2025-09-10 02:46:36.764480', 'step': 4106, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:36.819165', 'step': 4106, 'epoch': 2} +{'type': 'loss', 'content': 0.004490118473768234, 'timestamp': '2025-09-10 02:46:36.824059', 'step': 4107, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:46:36.899554', 'step': 4107, 'epoch': 2} +{'type': 'loss', 'content': 0.006942715495824814, 'timestamp': '2025-09-10 02:46:36.913229', 'step': 4108, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:36.975350', 'step': 4108, 'epoch': 2} +{'type': 'loss', 'content': 0.005277864169329405, 'timestamp': '2025-09-10 02:46:36.986954', 'step': 4109, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:37.043889', 'step': 4109, 'epoch': 2} +{'type': 'loss', 'content': 0.006958580110222101, 'timestamp': '2025-09-10 02:46:37.049703', 'step': 4110, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:37.106522', 'step': 4110, 'epoch': 2} +{'type': 'loss', 'content': 0.008101566694676876, 'timestamp': '2025-09-10 02:46:37.112348', 'step': 4111, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:37.168621', 'step': 4111, 'epoch': 2} +{'type': 'loss', 'content': 0.0010885442607104778, 'timestamp': '2025-09-10 02:46:37.174558', 'step': 4112, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:37.227355', 'step': 4112, 'epoch': 2} +{'type': 'loss', 'content': 0.012058460153639317, 'timestamp': '2025-09-10 02:46:37.232018', 'step': 4113, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:37.291019', 'step': 4113, 'epoch': 2} +{'type': 'loss', 'content': 0.0016954487655311823, 'timestamp': '2025-09-10 02:46:37.293175', 'step': 4114, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:37.346320', 'step': 4114, 'epoch': 2} +{'type': 'loss', 'content': 0.0035064031835645437, 'timestamp': '2025-09-10 02:46:37.348902', 'step': 4115, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:37.402777', 'step': 4115, 'epoch': 2} +{'type': 'loss', 'content': 0.0027604959905147552, 'timestamp': '2025-09-10 02:46:37.408887', 'step': 4116, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:46:54.393244', 'step': 4116, 'epoch': 2} +{'type': 'pplx', 'content': 27383942.289708607, 'timestamp': '2025-09-10 02:46:54.396230', 'step': 4116, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:54.450780', 'step': 4116, 'epoch': 2} +{'type': 'loss', 'content': 0.0010639320826157928, 'timestamp': '2025-09-10 02:46:54.455730', 'step': 4117, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:54.510405', 'step': 4117, 'epoch': 2} +{'type': 'loss', 'content': 0.0016920131165534258, 'timestamp': '2025-09-10 02:46:54.512656', 'step': 4118, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:54.566590', 'step': 4118, 'epoch': 2} +{'type': 'loss', 'content': 0.001813766430132091, 'timestamp': '2025-09-10 02:46:54.576084', 'step': 4119, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:54.630143', 'step': 4119, 'epoch': 2} +{'type': 'loss', 'content': 0.003169593634083867, 'timestamp': '2025-09-10 02:46:54.636314', 'step': 4120, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:54.689269', 'step': 4120, 'epoch': 2} +{'type': 'loss', 'content': 0.0005361930816434324, 'timestamp': '2025-09-10 02:46:54.695455', 'step': 4121, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:54.749198', 'step': 4121, 'epoch': 2} +{'type': 'loss', 'content': 0.002701799152418971, 'timestamp': '2025-09-10 02:46:54.751203', 'step': 4122, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:54.804655', 'step': 4122, 'epoch': 2} +{'type': 'loss', 'content': 0.006143040489405394, 'timestamp': '2025-09-10 02:46:54.806835', 'step': 4123, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:54.861923', 'step': 4123, 'epoch': 2} +{'type': 'loss', 'content': 0.006249502766877413, 'timestamp': '2025-09-10 02:46:54.869025', 'step': 4124, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:54.921840', 'step': 4124, 'epoch': 2} +{'type': 'loss', 'content': 0.0010239200200885534, 'timestamp': '2025-09-10 02:46:54.924359', 'step': 4125, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:54.978852', 'step': 4125, 'epoch': 2} +{'type': 'loss', 'content': 0.00025490522966720164, 'timestamp': '2025-09-10 02:46:54.980877', 'step': 4126, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:55.034205', 'step': 4126, 'epoch': 2} +{'type': 'loss', 'content': 0.0008148782653734088, 'timestamp': '2025-09-10 02:46:55.036659', 'step': 4127, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:55.091086', 'step': 4127, 'epoch': 2} +{'type': 'loss', 'content': 0.002787953708320856, 'timestamp': '2025-09-10 02:46:55.097150', 'step': 4128, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:55.150449', 'step': 4128, 'epoch': 2} +{'type': 'loss', 'content': 0.0013069541892036796, 'timestamp': '2025-09-10 02:46:55.152597', 'step': 4129, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:55.206346', 'step': 4129, 'epoch': 2} +{'type': 'loss', 'content': 0.0030835315119475126, 'timestamp': '2025-09-10 02:46:55.215972', 'step': 4130, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:55.270221', 'step': 4130, 'epoch': 2} +{'type': 'loss', 'content': 0.00046228888095356524, 'timestamp': '2025-09-10 02:46:55.272482', 'step': 4131, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:55.325734', 'step': 4131, 'epoch': 2} +{'type': 'loss', 'content': 0.0010769153013825417, 'timestamp': '2025-09-10 02:46:55.331820', 'step': 4132, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:55.384403', 'step': 4132, 'epoch': 2} +{'type': 'loss', 'content': 0.002228983212262392, 'timestamp': '2025-09-10 02:46:55.386409', 'step': 4133, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:55.444717', 'step': 4133, 'epoch': 2} +{'type': 'loss', 'content': 0.03251693397760391, 'timestamp': '2025-09-10 02:46:55.455145', 'step': 4134, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:55.508651', 'step': 4134, 'epoch': 2} +{'type': 'loss', 'content': 0.0027009884361177683, 'timestamp': '2025-09-10 02:46:55.516714', 'step': 4135, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:55.570060', 'step': 4135, 'epoch': 2} +{'type': 'loss', 'content': 0.023150457069277763, 'timestamp': '2025-09-10 02:46:55.575946', 'step': 4136, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:55.628654', 'step': 4136, 'epoch': 2} +{'type': 'loss', 'content': 0.02348208613693714, 'timestamp': '2025-09-10 02:46:55.636839', 'step': 4137, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:55.699096', 'step': 4137, 'epoch': 2} +{'type': 'loss', 'content': 0.002066678134724498, 'timestamp': '2025-09-10 02:46:55.710167', 'step': 4138, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:55.764694', 'step': 4138, 'epoch': 2} +{'type': 'loss', 'content': 0.0007858966710045934, 'timestamp': '2025-09-10 02:46:55.772747', 'step': 4139, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:55.826213', 'step': 4139, 'epoch': 2} +{'type': 'loss', 'content': 0.014663800597190857, 'timestamp': '2025-09-10 02:46:55.832208', 'step': 4140, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:46:55.896995', 'step': 4140, 'epoch': 2} +{'type': 'loss', 'content': 0.012350101955235004, 'timestamp': '2025-09-10 02:46:55.910243', 'step': 4141, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:55.964131', 'step': 4141, 'epoch': 2} +{'type': 'loss', 'content': 0.019156964495778084, 'timestamp': '2025-09-10 02:46:55.971703', 'step': 4142, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:56.026385', 'step': 4142, 'epoch': 2} +{'type': 'loss', 'content': 0.002492027124390006, 'timestamp': '2025-09-10 02:46:56.028674', 'step': 4143, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:56.081806', 'step': 4143, 'epoch': 2} +{'type': 'loss', 'content': 0.00240537291392684, 'timestamp': '2025-09-10 02:46:56.087597', 'step': 4144, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:46:56.140412', 'step': 4144, 'epoch': 2} +{'type': 'loss', 'content': 0.010689268819987774, 'timestamp': '2025-09-10 02:46:56.142423', 'step': 4145, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:56.195137', 'step': 4145, 'epoch': 2} +{'type': 'loss', 'content': 0.012677262537181377, 'timestamp': '2025-09-10 02:46:56.197306', 'step': 4146, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:56.250204', 'step': 4146, 'epoch': 2} +{'type': 'loss', 'content': 0.0003994545841123909, 'timestamp': '2025-09-10 02:46:56.252525', 'step': 4147, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:46:56.318783', 'step': 4147, 'epoch': 2} +{'type': 'loss', 'content': 0.007907947525382042, 'timestamp': '2025-09-10 02:46:56.331815', 'step': 4148, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:56.385652', 'step': 4148, 'epoch': 2} +{'type': 'loss', 'content': 0.008695722557604313, 'timestamp': '2025-09-10 02:46:56.390927', 'step': 4149, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:56.452800', 'step': 4149, 'epoch': 2} +{'type': 'loss', 'content': 0.016970546916127205, 'timestamp': '2025-09-10 02:46:56.463938', 'step': 4150, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:56.517949', 'step': 4150, 'epoch': 2} +{'type': 'loss', 'content': 0.0034423437900841236, 'timestamp': '2025-09-10 02:46:56.520381', 'step': 4151, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:56.573888', 'step': 4151, 'epoch': 2} +{'type': 'loss', 'content': 0.0005895387148484588, 'timestamp': '2025-09-10 02:46:56.579990', 'step': 4152, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:56.632439', 'step': 4152, 'epoch': 2} +{'type': 'loss', 'content': 0.022216182202100754, 'timestamp': '2025-09-10 02:46:56.634864', 'step': 4153, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:56.687792', 'step': 4153, 'epoch': 2} +{'type': 'loss', 'content': 8.276205335278064e-05, 'timestamp': '2025-09-10 02:46:56.689940', 'step': 4154, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:56.743660', 'step': 4154, 'epoch': 2} +{'type': 'loss', 'content': 0.0026618402916938066, 'timestamp': '2025-09-10 02:46:56.745700', 'step': 4155, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:56.799045', 'step': 4155, 'epoch': 2} +{'type': 'loss', 'content': 0.005132874008268118, 'timestamp': '2025-09-10 02:46:56.805265', 'step': 4156, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:56.857797', 'step': 4156, 'epoch': 2} +{'type': 'loss', 'content': 0.02009979449212551, 'timestamp': '2025-09-10 02:46:56.867665', 'step': 4157, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:56.925691', 'step': 4157, 'epoch': 2} +{'type': 'loss', 'content': 0.033425040543079376, 'timestamp': '2025-09-10 02:46:56.936102', 'step': 4158, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:56.994657', 'step': 4158, 'epoch': 2} +{'type': 'loss', 'content': 0.0018782642437145114, 'timestamp': '2025-09-10 02:46:57.005093', 'step': 4159, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:57.058451', 'step': 4159, 'epoch': 2} +{'type': 'loss', 'content': 0.0024663552176207304, 'timestamp': '2025-09-10 02:46:57.064595', 'step': 4160, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:57.117960', 'step': 4160, 'epoch': 2} +{'type': 'loss', 'content': 0.009812185540795326, 'timestamp': '2025-09-10 02:46:57.120250', 'step': 4161, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:57.173909', 'step': 4161, 'epoch': 2} +{'type': 'loss', 'content': 0.03139030560851097, 'timestamp': '2025-09-10 02:46:57.175910', 'step': 4162, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:57.228843', 'step': 4162, 'epoch': 2} +{'type': 'loss', 'content': 0.00015185572556219995, 'timestamp': '2025-09-10 02:46:57.230951', 'step': 4163, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:46:57.289433', 'step': 4163, 'epoch': 2} +{'type': 'loss', 'content': 0.0025247910525649786, 'timestamp': '2025-09-10 02:46:57.300663', 'step': 4164, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:57.353790', 'step': 4164, 'epoch': 2} +{'type': 'loss', 'content': 0.00016043234791141003, 'timestamp': '2025-09-10 02:46:57.356040', 'step': 4165, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:57.409538', 'step': 4165, 'epoch': 2} +{'type': 'loss', 'content': 0.003010234097018838, 'timestamp': '2025-09-10 02:46:57.417607', 'step': 4166, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:57.471450', 'step': 4166, 'epoch': 2} +{'type': 'loss', 'content': 0.02376936562359333, 'timestamp': '2025-09-10 02:46:57.477707', 'step': 4167, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:46:57.530885', 'step': 4167, 'epoch': 2} +{'type': 'loss', 'content': 0.003847723826766014, 'timestamp': '2025-09-10 02:46:57.536752', 'step': 4168, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:57.589197', 'step': 4168, 'epoch': 2} +{'type': 'loss', 'content': 0.03598278760910034, 'timestamp': '2025-09-10 02:46:57.599225', 'step': 4169, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:57.652783', 'step': 4169, 'epoch': 2} +{'type': 'loss', 'content': 0.0018984022317454219, 'timestamp': '2025-09-10 02:46:57.655038', 'step': 4170, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:57.708572', 'step': 4170, 'epoch': 2} +{'type': 'loss', 'content': 0.02804272435605526, 'timestamp': '2025-09-10 02:46:57.714692', 'step': 4171, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:57.769096', 'step': 4171, 'epoch': 2} +{'type': 'loss', 'content': 0.00391715532168746, 'timestamp': '2025-09-10 02:46:57.779526', 'step': 4172, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:57.832352', 'step': 4172, 'epoch': 2} +{'type': 'loss', 'content': 0.007302652578800917, 'timestamp': '2025-09-10 02:46:57.834309', 'step': 4173, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:57.894919', 'step': 4173, 'epoch': 2} +{'type': 'loss', 'content': 0.010661360807716846, 'timestamp': '2025-09-10 02:46:57.905701', 'step': 4174, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:57.959295', 'step': 4174, 'epoch': 2} +{'type': 'loss', 'content': 0.011691556312143803, 'timestamp': '2025-09-10 02:46:57.965609', 'step': 4175, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:58.020089', 'step': 4175, 'epoch': 2} +{'type': 'loss', 'content': 0.04713850095868111, 'timestamp': '2025-09-10 02:46:58.026024', 'step': 4176, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:58.078403', 'step': 4176, 'epoch': 2} +{'type': 'loss', 'content': 0.016401372849941254, 'timestamp': '2025-09-10 02:46:58.086520', 'step': 4177, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:58.139539', 'step': 4177, 'epoch': 2} +{'type': 'loss', 'content': 0.0005184911424294114, 'timestamp': '2025-09-10 02:46:58.141570', 'step': 4178, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:46:58.204505', 'step': 4178, 'epoch': 2} +{'type': 'loss', 'content': 0.0002902874839492142, 'timestamp': '2025-09-10 02:46:58.215648', 'step': 4179, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:58.270556', 'step': 4179, 'epoch': 2} +{'type': 'loss', 'content': 0.01087592076510191, 'timestamp': '2025-09-10 02:46:58.276426', 'step': 4180, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:46:58.329790', 'step': 4180, 'epoch': 2} +{'type': 'loss', 'content': 0.009179359301924706, 'timestamp': '2025-09-10 02:46:58.340307', 'step': 4181, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:58.394700', 'step': 4181, 'epoch': 2} +{'type': 'loss', 'content': 0.00733901048079133, 'timestamp': '2025-09-10 02:46:58.396959', 'step': 4182, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:58.451043', 'step': 4182, 'epoch': 2} +{'type': 'loss', 'content': 0.03791997209191322, 'timestamp': '2025-09-10 02:46:58.453943', 'step': 4183, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:58.507245', 'step': 4183, 'epoch': 2} +{'type': 'loss', 'content': 0.00893563125282526, 'timestamp': '2025-09-10 02:46:58.513109', 'step': 4184, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:46:58.566286', 'step': 4184, 'epoch': 2} +{'type': 'loss', 'content': 0.01717417687177658, 'timestamp': '2025-09-10 02:46:58.568614', 'step': 4185, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:58.621338', 'step': 4185, 'epoch': 2} +{'type': 'loss', 'content': 0.010687016882002354, 'timestamp': '2025-09-10 02:46:58.623769', 'step': 4186, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:58.677623', 'step': 4186, 'epoch': 2} +{'type': 'loss', 'content': 0.0004435994487721473, 'timestamp': '2025-09-10 02:46:58.684014', 'step': 4187, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:58.737901', 'step': 4187, 'epoch': 2} +{'type': 'loss', 'content': 0.0002909741015173495, 'timestamp': '2025-09-10 02:46:58.743750', 'step': 4188, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:46:58.797016', 'step': 4188, 'epoch': 2} +{'type': 'loss', 'content': 0.009057112038135529, 'timestamp': '2025-09-10 02:46:58.799190', 'step': 4189, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:58.852212', 'step': 4189, 'epoch': 2} +{'type': 'loss', 'content': 0.0025562902446836233, 'timestamp': '2025-09-10 02:46:58.854540', 'step': 4190, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:58.907910', 'step': 4190, 'epoch': 2} +{'type': 'loss', 'content': 0.012630063109099865, 'timestamp': '2025-09-10 02:46:58.910692', 'step': 4191, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:58.964011', 'step': 4191, 'epoch': 2} +{'type': 'loss', 'content': 0.0003869440406560898, 'timestamp': '2025-09-10 02:46:58.970000', 'step': 4192, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:46:59.022327', 'step': 4192, 'epoch': 2} +{'type': 'loss', 'content': 0.08045341074466705, 'timestamp': '2025-09-10 02:46:59.028834', 'step': 4193, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:59.082530', 'step': 4193, 'epoch': 2} +{'type': 'loss', 'content': 0.021230129525065422, 'timestamp': '2025-09-10 02:46:59.084972', 'step': 4194, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:59.139230', 'step': 4194, 'epoch': 2} +{'type': 'loss', 'content': 0.0014136239187791944, 'timestamp': '2025-09-10 02:46:59.148859', 'step': 4195, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:59.202393', 'step': 4195, 'epoch': 2} +{'type': 'loss', 'content': 0.02922656573355198, 'timestamp': '2025-09-10 02:46:59.208201', 'step': 4196, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:46:59.260960', 'step': 4196, 'epoch': 2} +{'type': 'loss', 'content': 0.0003514822747092694, 'timestamp': '2025-09-10 02:46:59.270809', 'step': 4197, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:46:59.324391', 'step': 4197, 'epoch': 2} +{'type': 'loss', 'content': 0.013680154457688332, 'timestamp': '2025-09-10 02:46:59.326703', 'step': 4198, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:59.380555', 'step': 4198, 'epoch': 2} +{'type': 'loss', 'content': 0.0015003107255324721, 'timestamp': '2025-09-10 02:46:59.383025', 'step': 4199, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:59.436074', 'step': 4199, 'epoch': 2} +{'type': 'loss', 'content': 0.009432598948478699, 'timestamp': '2025-09-10 02:46:59.442151', 'step': 4200, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:59.494609', 'step': 4200, 'epoch': 2} +{'type': 'loss', 'content': 0.01662013866007328, 'timestamp': '2025-09-10 02:46:59.497564', 'step': 4201, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:46:59.549887', 'step': 4201, 'epoch': 2} +{'type': 'loss', 'content': 0.0062576900236308575, 'timestamp': '2025-09-10 02:46:59.551929', 'step': 4202, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:46:59.604580', 'step': 4202, 'epoch': 2} +{'type': 'loss', 'content': 0.00976634118705988, 'timestamp': '2025-09-10 02:46:59.607532', 'step': 4203, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:46:59.660778', 'step': 4203, 'epoch': 2} +{'type': 'loss', 'content': 0.00025636356440372765, 'timestamp': '2025-09-10 02:46:59.669799', 'step': 4204, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:46:59.728914', 'step': 4204, 'epoch': 2} +{'type': 'loss', 'content': 0.000732213375158608, 'timestamp': '2025-09-10 02:46:59.740506', 'step': 4205, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:46:59.793887', 'step': 4205, 'epoch': 2} +{'type': 'loss', 'content': 0.016269633546471596, 'timestamp': '2025-09-10 02:46:59.796173', 'step': 4206, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:59.849798', 'step': 4206, 'epoch': 2} +{'type': 'loss', 'content': 0.022343024611473083, 'timestamp': '2025-09-10 02:46:59.852200', 'step': 4207, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:46:59.905285', 'step': 4207, 'epoch': 2} +{'type': 'loss', 'content': 0.0001984843984246254, 'timestamp': '2025-09-10 02:46:59.911266', 'step': 4208, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:46:59.977933', 'step': 4208, 'epoch': 2} +{'type': 'loss', 'content': 0.005612724460661411, 'timestamp': '2025-09-10 02:46:59.991515', 'step': 4209, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 16960103024960.0}, 'timestamp': '2025-09-10 02:47:00.114987', 'step': 4209, 'epoch': 2} +{'type': 'loss', 'content': 0.0010717118857428432, 'timestamp': '2025-09-10 02:47:00.139058', 'step': 4210, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:00.194483', 'step': 4210, 'epoch': 2} +{'type': 'loss', 'content': 0.001120222732424736, 'timestamp': '2025-09-10 02:47:00.204296', 'step': 4211, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:00.258072', 'step': 4211, 'epoch': 2} +{'type': 'loss', 'content': 0.006723283790051937, 'timestamp': '2025-09-10 02:47:00.264003', 'step': 4212, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:00.319356', 'step': 4212, 'epoch': 2} +{'type': 'loss', 'content': 0.008961746469140053, 'timestamp': '2025-09-10 02:47:00.324288', 'step': 4213, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:00.378142', 'step': 4213, 'epoch': 2} +{'type': 'loss', 'content': 0.0066358777694404125, 'timestamp': '2025-09-10 02:47:00.384564', 'step': 4214, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:00.438284', 'step': 4214, 'epoch': 2} +{'type': 'loss', 'content': 0.009775841608643532, 'timestamp': '2025-09-10 02:47:00.440823', 'step': 4215, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:00.500905', 'step': 4215, 'epoch': 2} +{'type': 'loss', 'content': 0.016747374087572098, 'timestamp': '2025-09-10 02:47:00.512369', 'step': 4216, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:47:00.572725', 'step': 4216, 'epoch': 2} +{'type': 'loss', 'content': 0.008201695047318935, 'timestamp': '2025-09-10 02:47:00.584670', 'step': 4217, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:00.638359', 'step': 4217, 'epoch': 2} +{'type': 'loss', 'content': 0.008140222169458866, 'timestamp': '2025-09-10 02:47:00.640589', 'step': 4218, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:47:00.716208', 'step': 4218, 'epoch': 2} +{'type': 'loss', 'content': 0.012579244561493397, 'timestamp': '2025-09-10 02:47:00.730272', 'step': 4219, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:47:00.800496', 'step': 4219, 'epoch': 2} +{'type': 'loss', 'content': 0.0007998248329386115, 'timestamp': '2025-09-10 02:47:00.813997', 'step': 4220, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:00.868888', 'step': 4220, 'epoch': 2} +{'type': 'loss', 'content': 0.006177857052534819, 'timestamp': '2025-09-10 02:47:00.871385', 'step': 4221, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:00.926100', 'step': 4221, 'epoch': 2} +{'type': 'loss', 'content': 0.011896011419594288, 'timestamp': '2025-09-10 02:47:00.935902', 'step': 4222, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:00.989884', 'step': 4222, 'epoch': 2} +{'type': 'loss', 'content': 0.004865396302193403, 'timestamp': '2025-09-10 02:47:00.992633', 'step': 4223, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:01.045579', 'step': 4223, 'epoch': 2} +{'type': 'loss', 'content': 0.0060439822264015675, 'timestamp': '2025-09-10 02:47:01.054520', 'step': 4224, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:01.106900', 'step': 4224, 'epoch': 2} +{'type': 'loss', 'content': 0.0036904923617839813, 'timestamp': '2025-09-10 02:47:01.114935', 'step': 4225, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:01.168558', 'step': 4225, 'epoch': 2} +{'type': 'loss', 'content': 0.025875460356473923, 'timestamp': '2025-09-10 02:47:01.170725', 'step': 4226, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:01.229380', 'step': 4226, 'epoch': 2} +{'type': 'loss', 'content': 0.0035130020696669817, 'timestamp': '2025-09-10 02:47:01.239838', 'step': 4227, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:01.293331', 'step': 4227, 'epoch': 2} +{'type': 'loss', 'content': 0.005314098205417395, 'timestamp': '2025-09-10 02:47:01.299402', 'step': 4228, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:01.353494', 'step': 4228, 'epoch': 2} +{'type': 'loss', 'content': 0.015061727724969387, 'timestamp': '2025-09-10 02:47:01.358896', 'step': 4229, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:01.413507', 'step': 4229, 'epoch': 2} +{'type': 'loss', 'content': 0.005133180413395166, 'timestamp': '2025-09-10 02:47:01.415973', 'step': 4230, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:47:01.469253', 'step': 4230, 'epoch': 2} +{'type': 'loss', 'content': 0.004526397679001093, 'timestamp': '2025-09-10 02:47:01.471532', 'step': 4231, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:01.537907', 'step': 4231, 'epoch': 2} +{'type': 'loss', 'content': 0.0045868405140936375, 'timestamp': '2025-09-10 02:47:01.550884', 'step': 4232, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:01.603546', 'step': 4232, 'epoch': 2} +{'type': 'loss', 'content': 0.002852875739336014, 'timestamp': '2025-09-10 02:47:01.605456', 'step': 4233, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:01.659974', 'step': 4233, 'epoch': 2} +{'type': 'loss', 'content': 0.010279986076056957, 'timestamp': '2025-09-10 02:47:01.669781', 'step': 4234, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:01.722852', 'step': 4234, 'epoch': 2} +{'type': 'loss', 'content': 0.023315981030464172, 'timestamp': '2025-09-10 02:47:01.729435', 'step': 4235, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:01.782635', 'step': 4235, 'epoch': 2} +{'type': 'loss', 'content': 0.011400463059544563, 'timestamp': '2025-09-10 02:47:01.788676', 'step': 4236, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:01.853734', 'step': 4236, 'epoch': 2} +{'type': 'loss', 'content': 0.0022969634737819433, 'timestamp': '2025-09-10 02:47:01.866876', 'step': 4237, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:01.922017', 'step': 4237, 'epoch': 2} +{'type': 'loss', 'content': 0.013529693707823753, 'timestamp': '2025-09-10 02:47:01.924522', 'step': 4238, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:01.978957', 'step': 4238, 'epoch': 2} +{'type': 'loss', 'content': 0.016862807795405388, 'timestamp': '2025-09-10 02:47:01.981286', 'step': 4239, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:02.041957', 'step': 4239, 'epoch': 2} +{'type': 'loss', 'content': 0.0025355503894388676, 'timestamp': '2025-09-10 02:47:02.053446', 'step': 4240, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:02.106953', 'step': 4240, 'epoch': 2} +{'type': 'loss', 'content': 0.010484575293958187, 'timestamp': '2025-09-10 02:47:02.109049', 'step': 4241, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:02.162184', 'step': 4241, 'epoch': 2} +{'type': 'loss', 'content': 0.005992446094751358, 'timestamp': '2025-09-10 02:47:02.164390', 'step': 4242, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:02.219482', 'step': 4242, 'epoch': 2} +{'type': 'loss', 'content': 0.020115068182349205, 'timestamp': '2025-09-10 02:47:02.229278', 'step': 4243, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:02.283157', 'step': 4243, 'epoch': 2} +{'type': 'loss', 'content': 0.003577465657144785, 'timestamp': '2025-09-10 02:47:02.289233', 'step': 4244, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:02.342126', 'step': 4244, 'epoch': 2} +{'type': 'loss', 'content': 0.003213457064703107, 'timestamp': '2025-09-10 02:47:02.344363', 'step': 4245, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:02.397029', 'step': 4245, 'epoch': 2} +{'type': 'loss', 'content': 0.013622616417706013, 'timestamp': '2025-09-10 02:47:02.400079', 'step': 4246, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:02.453550', 'step': 4246, 'epoch': 2} +{'type': 'loss', 'content': 0.0036872965283691883, 'timestamp': '2025-09-10 02:47:02.455910', 'step': 4247, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:02.509963', 'step': 4247, 'epoch': 2} +{'type': 'loss', 'content': 0.002753880573436618, 'timestamp': '2025-09-10 02:47:02.516126', 'step': 4248, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:02.574157', 'step': 4248, 'epoch': 2} +{'type': 'loss', 'content': 0.0028752887155860662, 'timestamp': '2025-09-10 02:47:02.580153', 'step': 4249, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:02.635160', 'step': 4249, 'epoch': 2} +{'type': 'loss', 'content': 0.001357552013359964, 'timestamp': '2025-09-10 02:47:02.637590', 'step': 4250, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:47:02.734227', 'step': 4250, 'epoch': 2} +{'type': 'loss', 'content': 0.006518196314573288, 'timestamp': '2025-09-10 02:47:02.752726', 'step': 4251, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:02.807718', 'step': 4251, 'epoch': 2} +{'type': 'loss', 'content': 0.0052061183378100395, 'timestamp': '2025-09-10 02:47:02.815701', 'step': 4252, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:02.869328', 'step': 4252, 'epoch': 2} +{'type': 'loss', 'content': 0.0058479164727032185, 'timestamp': '2025-09-10 02:47:02.871672', 'step': 4253, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:47:02.940033', 'step': 4253, 'epoch': 2} +{'type': 'loss', 'content': 0.0052193403244018555, 'timestamp': '2025-09-10 02:47:02.952642', 'step': 4254, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:03.006588', 'step': 4254, 'epoch': 2} +{'type': 'loss', 'content': 0.0011164784664288163, 'timestamp': '2025-09-10 02:47:03.011760', 'step': 4255, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:03.065292', 'step': 4255, 'epoch': 2} +{'type': 'loss', 'content': 0.003421362955123186, 'timestamp': '2025-09-10 02:47:03.071359', 'step': 4256, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:03.124053', 'step': 4256, 'epoch': 2} +{'type': 'loss', 'content': 0.00365271745249629, 'timestamp': '2025-09-10 02:47:03.134274', 'step': 4257, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:03.202031', 'step': 4257, 'epoch': 2} +{'type': 'loss', 'content': 0.0006833566003479064, 'timestamp': '2025-09-10 02:47:03.212936', 'step': 4258, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:03.266836', 'step': 4258, 'epoch': 2} +{'type': 'loss', 'content': 0.0012600627960637212, 'timestamp': '2025-09-10 02:47:03.276449', 'step': 4259, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:03.329886', 'step': 4259, 'epoch': 2} +{'type': 'loss', 'content': 0.00101366825401783, 'timestamp': '2025-09-10 02:47:03.339043', 'step': 4260, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:03.398187', 'step': 4260, 'epoch': 2} +{'type': 'loss', 'content': 0.00017269121599383652, 'timestamp': '2025-09-10 02:47:03.409705', 'step': 4261, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:03.468023', 'step': 4261, 'epoch': 2} +{'type': 'loss', 'content': 0.008999736979603767, 'timestamp': '2025-09-10 02:47:03.470330', 'step': 4262, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:03.527535', 'step': 4262, 'epoch': 2} +{'type': 'loss', 'content': 0.004750747699290514, 'timestamp': '2025-09-10 02:47:03.530025', 'step': 4263, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:47:20.555136', 'step': 4263, 'epoch': 2} +{'type': 'pplx', 'content': 25644173.16284924, 'timestamp': '2025-09-10 02:47:20.557910', 'step': 4263, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:20.612045', 'step': 4263, 'epoch': 2} +{'type': 'loss', 'content': 0.0016500626225024462, 'timestamp': '2025-09-10 02:47:20.621609', 'step': 4264, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:20.674918', 'step': 4264, 'epoch': 2} +{'type': 'loss', 'content': 0.004900079686194658, 'timestamp': '2025-09-10 02:47:20.680534', 'step': 4265, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:20.733324', 'step': 4265, 'epoch': 2} +{'type': 'loss', 'content': 0.0019907455425709486, 'timestamp': '2025-09-10 02:47:20.735673', 'step': 4266, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:20.789127', 'step': 4266, 'epoch': 2} +{'type': 'loss', 'content': 0.0002901068946812302, 'timestamp': '2025-09-10 02:47:20.791404', 'step': 4267, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:20.844339', 'step': 4267, 'epoch': 2} +{'type': 'loss', 'content': 0.001220567268319428, 'timestamp': '2025-09-10 02:47:20.851390', 'step': 4268, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:20.908307', 'step': 4268, 'epoch': 2} +{'type': 'loss', 'content': 0.0025715783704072237, 'timestamp': '2025-09-10 02:47:20.919513', 'step': 4269, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:20.974498', 'step': 4269, 'epoch': 2} +{'type': 'loss', 'content': 0.0004607465525623411, 'timestamp': '2025-09-10 02:47:20.976877', 'step': 4270, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.030611', 'step': 4270, 'epoch': 2} +{'type': 'loss', 'content': 0.013449196703732014, 'timestamp': '2025-09-10 02:47:21.032653', 'step': 4271, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:21.085190', 'step': 4271, 'epoch': 2} +{'type': 'loss', 'content': 0.003410050878301263, 'timestamp': '2025-09-10 02:47:21.091109', 'step': 4272, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:21.149471', 'step': 4272, 'epoch': 2} +{'type': 'loss', 'content': 0.0329984612762928, 'timestamp': '2025-09-10 02:47:21.161083', 'step': 4273, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.215238', 'step': 4273, 'epoch': 2} +{'type': 'loss', 'content': 0.014055393636226654, 'timestamp': '2025-09-10 02:47:21.217820', 'step': 4274, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:21.270713', 'step': 4274, 'epoch': 2} +{'type': 'loss', 'content': 0.008442390710115433, 'timestamp': '2025-09-10 02:47:21.272802', 'step': 4275, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:21.325573', 'step': 4275, 'epoch': 2} +{'type': 'loss', 'content': 0.0004875912272837013, 'timestamp': '2025-09-10 02:47:21.332870', 'step': 4276, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:21.384952', 'step': 4276, 'epoch': 2} +{'type': 'loss', 'content': 0.0014641110319644213, 'timestamp': '2025-09-10 02:47:21.386827', 'step': 4277, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:21.439352', 'step': 4277, 'epoch': 2} +{'type': 'loss', 'content': 0.0051191323436796665, 'timestamp': '2025-09-10 02:47:21.442265', 'step': 4278, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:47:21.503120', 'step': 4278, 'epoch': 2} +{'type': 'loss', 'content': 0.0016277057584375143, 'timestamp': '2025-09-10 02:47:21.514273', 'step': 4279, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:21.567774', 'step': 4279, 'epoch': 2} +{'type': 'loss', 'content': 0.00355696864426136, 'timestamp': '2025-09-10 02:47:21.573852', 'step': 4280, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:21.626882', 'step': 4280, 'epoch': 2} +{'type': 'loss', 'content': 0.0005710911937057972, 'timestamp': '2025-09-10 02:47:21.629154', 'step': 4281, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:21.683330', 'step': 4281, 'epoch': 2} +{'type': 'loss', 'content': 0.0006681772647425532, 'timestamp': '2025-09-10 02:47:21.692942', 'step': 4282, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.748994', 'step': 4282, 'epoch': 2} +{'type': 'loss', 'content': 0.0033257752656936646, 'timestamp': '2025-09-10 02:47:21.751506', 'step': 4283, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.804798', 'step': 4283, 'epoch': 2} +{'type': 'loss', 'content': 0.0073212082497775555, 'timestamp': '2025-09-10 02:47:21.810623', 'step': 4284, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:21.868002', 'step': 4284, 'epoch': 2} +{'type': 'loss', 'content': 0.005071139428764582, 'timestamp': '2025-09-10 02:47:21.872237', 'step': 4285, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:21.929796', 'step': 4285, 'epoch': 2} +{'type': 'loss', 'content': 0.012945490889251232, 'timestamp': '2025-09-10 02:47:21.939425', 'step': 4286, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:22.006140', 'step': 4286, 'epoch': 2} +{'type': 'loss', 'content': 0.021135341376066208, 'timestamp': '2025-09-10 02:47:22.015930', 'step': 4287, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:22.077904', 'step': 4287, 'epoch': 2} +{'type': 'loss', 'content': 9.846382454270497e-05, 'timestamp': '2025-09-10 02:47:22.085323', 'step': 4288, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:22.138448', 'step': 4288, 'epoch': 2} +{'type': 'loss', 'content': 0.000921298167668283, 'timestamp': '2025-09-10 02:47:22.146747', 'step': 4289, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:22.205334', 'step': 4289, 'epoch': 2} +{'type': 'loss', 'content': 0.003444183384999633, 'timestamp': '2025-09-10 02:47:22.212005', 'step': 4290, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:22.268199', 'step': 4290, 'epoch': 2} +{'type': 'loss', 'content': 0.04066583886742592, 'timestamp': '2025-09-10 02:47:22.274795', 'step': 4291, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:22.331016', 'step': 4291, 'epoch': 2} +{'type': 'loss', 'content': 0.00016234882059507072, 'timestamp': '2025-09-10 02:47:22.336686', 'step': 4292, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:22.389308', 'step': 4292, 'epoch': 2} +{'type': 'loss', 'content': 0.006329437252134085, 'timestamp': '2025-09-10 02:47:22.392343', 'step': 4293, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:22.446697', 'step': 4293, 'epoch': 2} +{'type': 'loss', 'content': 0.00017065482097677886, 'timestamp': '2025-09-10 02:47:22.456233', 'step': 4294, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:22.511983', 'step': 4294, 'epoch': 2} +{'type': 'loss', 'content': 0.0008375744218938053, 'timestamp': '2025-09-10 02:47:22.521622', 'step': 4295, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:22.576141', 'step': 4295, 'epoch': 2} +{'type': 'loss', 'content': 0.009327185340225697, 'timestamp': '2025-09-10 02:47:22.596244', 'step': 4296, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:22.649963', 'step': 4296, 'epoch': 2} +{'type': 'loss', 'content': 0.0028118512127548456, 'timestamp': '2025-09-10 02:47:22.652330', 'step': 4297, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:22.705789', 'step': 4297, 'epoch': 2} +{'type': 'loss', 'content': 0.00021826619922649115, 'timestamp': '2025-09-10 02:47:22.708367', 'step': 4298, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:22.762756', 'step': 4298, 'epoch': 2} +{'type': 'loss', 'content': 0.02473120577633381, 'timestamp': '2025-09-10 02:47:22.765033', 'step': 4299, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:22.824101', 'step': 4299, 'epoch': 2} +{'type': 'loss', 'content': 0.010418480262160301, 'timestamp': '2025-09-10 02:47:22.830332', 'step': 4300, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:22.885674', 'step': 4300, 'epoch': 2} +{'type': 'loss', 'content': 0.0022602102253586054, 'timestamp': '2025-09-10 02:47:22.892201', 'step': 4301, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:22.947884', 'step': 4301, 'epoch': 2} +{'type': 'loss', 'content': 0.0003526568762026727, 'timestamp': '2025-09-10 02:47:22.950735', 'step': 4302, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:23.004783', 'step': 4302, 'epoch': 2} +{'type': 'loss', 'content': 0.0012839640257880092, 'timestamp': '2025-09-10 02:47:23.011225', 'step': 4303, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:23.069550', 'step': 4303, 'epoch': 2} +{'type': 'loss', 'content': 0.00187406106851995, 'timestamp': '2025-09-10 02:47:23.079918', 'step': 4304, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:23.133427', 'step': 4304, 'epoch': 2} +{'type': 'loss', 'content': 0.0009665919351391494, 'timestamp': '2025-09-10 02:47:23.138522', 'step': 4305, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:23.201381', 'step': 4305, 'epoch': 2} +{'type': 'loss', 'content': 0.00036055262899026275, 'timestamp': '2025-09-10 02:47:23.203984', 'step': 4306, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:23.257178', 'step': 4306, 'epoch': 2} +{'type': 'loss', 'content': 0.004071732517331839, 'timestamp': '2025-09-10 02:47:23.272067', 'step': 4307, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:23.335915', 'step': 4307, 'epoch': 2} +{'type': 'loss', 'content': 0.008712194859981537, 'timestamp': '2025-09-10 02:47:23.343969', 'step': 4308, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:23.398148', 'step': 4308, 'epoch': 2} +{'type': 'loss', 'content': 0.001939832349307835, 'timestamp': '2025-09-10 02:47:23.402555', 'step': 4309, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:23.460393', 'step': 4309, 'epoch': 2} +{'type': 'loss', 'content': 0.0015591054689139128, 'timestamp': '2025-09-10 02:47:23.467337', 'step': 4310, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:23.521567', 'step': 4310, 'epoch': 2} +{'type': 'loss', 'content': 0.0003225362452212721, 'timestamp': '2025-09-10 02:47:23.524894', 'step': 4311, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:23.580010', 'step': 4311, 'epoch': 2} +{'type': 'loss', 'content': 0.00046219161595217884, 'timestamp': '2025-09-10 02:47:23.587617', 'step': 4312, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:23.639698', 'step': 4312, 'epoch': 2} +{'type': 'loss', 'content': 0.0006220670184120536, 'timestamp': '2025-09-10 02:47:23.642543', 'step': 4313, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:23.703670', 'step': 4313, 'epoch': 2} +{'type': 'loss', 'content': 0.008913377299904823, 'timestamp': '2025-09-10 02:47:23.714617', 'step': 4314, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:23.772255', 'step': 4314, 'epoch': 2} +{'type': 'loss', 'content': 0.005080437753349543, 'timestamp': '2025-09-10 02:47:23.782675', 'step': 4315, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:23.835501', 'step': 4315, 'epoch': 2} +{'type': 'loss', 'content': 9.659978240961209e-05, 'timestamp': '2025-09-10 02:47:23.841426', 'step': 4316, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:23.894008', 'step': 4316, 'epoch': 2} +{'type': 'loss', 'content': 0.001103831920772791, 'timestamp': '2025-09-10 02:47:23.904154', 'step': 4317, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:23.957041', 'step': 4317, 'epoch': 2} +{'type': 'loss', 'content': 9.482850873610005e-05, 'timestamp': '2025-09-10 02:47:23.959203', 'step': 4318, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:24.011631', 'step': 4318, 'epoch': 2} +{'type': 'loss', 'content': 0.0010703227017074823, 'timestamp': '2025-09-10 02:47:24.014662', 'step': 4319, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:24.067439', 'step': 4319, 'epoch': 2} +{'type': 'loss', 'content': 0.0008624103502370417, 'timestamp': '2025-09-10 02:47:24.073101', 'step': 4320, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:24.125053', 'step': 4320, 'epoch': 2} +{'type': 'loss', 'content': 0.0016158433863893151, 'timestamp': '2025-09-10 02:47:24.131764', 'step': 4321, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:24.184882', 'step': 4321, 'epoch': 2} +{'type': 'loss', 'content': 0.0019937059842050076, 'timestamp': '2025-09-10 02:47:24.187111', 'step': 4322, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:24.239897', 'step': 4322, 'epoch': 2} +{'type': 'loss', 'content': 0.0021892760414630175, 'timestamp': '2025-09-10 02:47:24.242117', 'step': 4323, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:24.296569', 'step': 4323, 'epoch': 2} +{'type': 'loss', 'content': 0.0002153662935597822, 'timestamp': '2025-09-10 02:47:24.307147', 'step': 4324, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:24.359664', 'step': 4324, 'epoch': 2} +{'type': 'loss', 'content': 0.00015940495359245688, 'timestamp': '2025-09-10 02:47:24.361676', 'step': 4325, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:24.414472', 'step': 4325, 'epoch': 2} +{'type': 'loss', 'content': 0.004014693666249514, 'timestamp': '2025-09-10 02:47:24.416673', 'step': 4326, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:24.470093', 'step': 4326, 'epoch': 2} +{'type': 'loss', 'content': 0.0009683924727141857, 'timestamp': '2025-09-10 02:47:24.472239', 'step': 4327, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:24.525324', 'step': 4327, 'epoch': 2} +{'type': 'loss', 'content': 0.0016802401514723897, 'timestamp': '2025-09-10 02:47:24.531298', 'step': 4328, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:24.583683', 'step': 4328, 'epoch': 2} +{'type': 'loss', 'content': 0.007548587862402201, 'timestamp': '2025-09-10 02:47:24.593898', 'step': 4329, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:24.647445', 'step': 4329, 'epoch': 2} +{'type': 'loss', 'content': 0.0004307155904825777, 'timestamp': '2025-09-10 02:47:24.649541', 'step': 4330, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:47:24.711439', 'step': 4330, 'epoch': 2} +{'type': 'loss', 'content': 0.0032655613031238317, 'timestamp': '2025-09-10 02:47:24.722600', 'step': 4331, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:24.777002', 'step': 4331, 'epoch': 2} +{'type': 'loss', 'content': 0.02425667643547058, 'timestamp': '2025-09-10 02:47:24.783149', 'step': 4332, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:24.835310', 'step': 4332, 'epoch': 2} +{'type': 'loss', 'content': 0.002336332807317376, 'timestamp': '2025-09-10 02:47:24.838283', 'step': 4333, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:47:24.907017', 'step': 4333, 'epoch': 2} +{'type': 'loss', 'content': 0.0016943739028647542, 'timestamp': '2025-09-10 02:47:24.919743', 'step': 4334, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:24.972647', 'step': 4334, 'epoch': 2} +{'type': 'loss', 'content': 0.05163237825036049, 'timestamp': '2025-09-10 02:47:24.975635', 'step': 4335, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:25.028542', 'step': 4335, 'epoch': 2} +{'type': 'loss', 'content': 0.00012770164175890386, 'timestamp': '2025-09-10 02:47:25.035960', 'step': 4336, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:25.087963', 'step': 4336, 'epoch': 2} +{'type': 'loss', 'content': 0.0023083502892404795, 'timestamp': '2025-09-10 02:47:25.091059', 'step': 4337, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:25.144155', 'step': 4337, 'epoch': 2} +{'type': 'loss', 'content': 0.056472379714250565, 'timestamp': '2025-09-10 02:47:25.146165', 'step': 4338, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:25.198933', 'step': 4338, 'epoch': 2} +{'type': 'loss', 'content': 0.00045358770876191556, 'timestamp': '2025-09-10 02:47:25.200947', 'step': 4339, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:47:25.271774', 'step': 4339, 'epoch': 2} +{'type': 'loss', 'content': 0.0016407015500590205, 'timestamp': '2025-09-10 02:47:25.285370', 'step': 4340, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:25.338801', 'step': 4340, 'epoch': 2} +{'type': 'loss', 'content': 0.02011013962328434, 'timestamp': '2025-09-10 02:47:25.341070', 'step': 4341, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:25.394716', 'step': 4341, 'epoch': 2} +{'type': 'loss', 'content': 0.004920617211610079, 'timestamp': '2025-09-10 02:47:25.397148', 'step': 4342, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:25.450058', 'step': 4342, 'epoch': 2} +{'type': 'loss', 'content': 0.01722615212202072, 'timestamp': '2025-09-10 02:47:25.452234', 'step': 4343, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:25.505444', 'step': 4343, 'epoch': 2} +{'type': 'loss', 'content': 0.028729308396577835, 'timestamp': '2025-09-10 02:47:25.511093', 'step': 4344, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:25.564881', 'step': 4344, 'epoch': 2} +{'type': 'loss', 'content': 0.00216209446080029, 'timestamp': '2025-09-10 02:47:25.566984', 'step': 4345, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:25.619792', 'step': 4345, 'epoch': 2} +{'type': 'loss', 'content': 0.00043724634451791644, 'timestamp': '2025-09-10 02:47:25.622083', 'step': 4346, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:25.675573', 'step': 4346, 'epoch': 2} +{'type': 'loss', 'content': 0.02942570671439171, 'timestamp': '2025-09-10 02:47:25.682206', 'step': 4347, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:25.735464', 'step': 4347, 'epoch': 2} +{'type': 'loss', 'content': 0.0001083740935428068, 'timestamp': '2025-09-10 02:47:25.741207', 'step': 4348, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:25.794934', 'step': 4348, 'epoch': 2} +{'type': 'loss', 'content': 0.001994392601773143, 'timestamp': '2025-09-10 02:47:25.805431', 'step': 4349, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:25.858525', 'step': 4349, 'epoch': 2} +{'type': 'loss', 'content': 0.0007920349016785622, 'timestamp': '2025-09-10 02:47:25.860944', 'step': 4350, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:25.914208', 'step': 4350, 'epoch': 2} +{'type': 'loss', 'content': 9.946012869477272e-05, 'timestamp': '2025-09-10 02:47:25.916488', 'step': 4351, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:25.970165', 'step': 4351, 'epoch': 2} +{'type': 'loss', 'content': 0.00013539181964006275, 'timestamp': '2025-09-10 02:47:25.976140', 'step': 4352, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:26.028887', 'step': 4352, 'epoch': 2} +{'type': 'loss', 'content': 0.0017476447392255068, 'timestamp': '2025-09-10 02:47:26.031425', 'step': 4353, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:26.084105', 'step': 4353, 'epoch': 2} +{'type': 'loss', 'content': 0.00012519230949692428, 'timestamp': '2025-09-10 02:47:26.087412', 'step': 4354, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:26.141005', 'step': 4354, 'epoch': 2} +{'type': 'loss', 'content': 0.00020083349954802543, 'timestamp': '2025-09-10 02:47:26.147558', 'step': 4355, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:26.200371', 'step': 4355, 'epoch': 2} +{'type': 'loss', 'content': 0.0002472948399372399, 'timestamp': '2025-09-10 02:47:26.206234', 'step': 4356, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:26.259881', 'step': 4356, 'epoch': 2} +{'type': 'loss', 'content': 0.002402596641331911, 'timestamp': '2025-09-10 02:47:26.270159', 'step': 4357, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:26.325044', 'step': 4357, 'epoch': 2} +{'type': 'loss', 'content': 0.002291470766067505, 'timestamp': '2025-09-10 02:47:26.334835', 'step': 4358, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:26.388028', 'step': 4358, 'epoch': 2} +{'type': 'loss', 'content': 0.0004171359760221094, 'timestamp': '2025-09-10 02:47:26.390157', 'step': 4359, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:26.443648', 'step': 4359, 'epoch': 2} +{'type': 'loss', 'content': 0.000416089576901868, 'timestamp': '2025-09-10 02:47:26.449400', 'step': 4360, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:26.501770', 'step': 4360, 'epoch': 2} +{'type': 'loss', 'content': 0.012556105852127075, 'timestamp': '2025-09-10 02:47:26.504180', 'step': 4361, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:26.558772', 'step': 4361, 'epoch': 2} +{'type': 'loss', 'content': 0.0003699475491885096, 'timestamp': '2025-09-10 02:47:26.568567', 'step': 4362, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:26.621835', 'step': 4362, 'epoch': 2} +{'type': 'loss', 'content': 0.0029870334547013044, 'timestamp': '2025-09-10 02:47:26.624027', 'step': 4363, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:26.677227', 'step': 4363, 'epoch': 2} +{'type': 'loss', 'content': 0.008871073834598064, 'timestamp': '2025-09-10 02:47:26.682959', 'step': 4364, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:26.735316', 'step': 4364, 'epoch': 2} +{'type': 'loss', 'content': 0.0008695862488821149, 'timestamp': '2025-09-10 02:47:26.743558', 'step': 4365, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:26.796770', 'step': 4365, 'epoch': 2} +{'type': 'loss', 'content': 0.004937638994306326, 'timestamp': '2025-09-10 02:47:26.803292', 'step': 4366, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:26.856147', 'step': 4366, 'epoch': 2} +{'type': 'loss', 'content': 0.00038243673043325543, 'timestamp': '2025-09-10 02:47:26.859179', 'step': 4367, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:26.912932', 'step': 4367, 'epoch': 2} +{'type': 'loss', 'content': 0.00197978294454515, 'timestamp': '2025-09-10 02:47:26.918699', 'step': 4368, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:26.971602', 'step': 4368, 'epoch': 2} +{'type': 'loss', 'content': 0.0035370823461562395, 'timestamp': '2025-09-10 02:47:26.974181', 'step': 4369, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:27.027660', 'step': 4369, 'epoch': 2} +{'type': 'loss', 'content': 0.02326894737780094, 'timestamp': '2025-09-10 02:47:27.030161', 'step': 4370, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:27.083290', 'step': 4370, 'epoch': 2} +{'type': 'loss', 'content': 0.08179046213626862, 'timestamp': '2025-09-10 02:47:27.085755', 'step': 4371, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:27.138643', 'step': 4371, 'epoch': 2} +{'type': 'loss', 'content': 0.00025862836628220975, 'timestamp': '2025-09-10 02:47:27.144786', 'step': 4372, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:27.197129', 'step': 4372, 'epoch': 2} +{'type': 'loss', 'content': 0.0045958468690514565, 'timestamp': '2025-09-10 02:47:27.200182', 'step': 4373, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:27.253033', 'step': 4373, 'epoch': 2} +{'type': 'loss', 'content': 0.011955601163208485, 'timestamp': '2025-09-10 02:47:27.255362', 'step': 4374, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:27.309125', 'step': 4374, 'epoch': 2} +{'type': 'loss', 'content': 0.000381296529667452, 'timestamp': '2025-09-10 02:47:27.312199', 'step': 4375, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:27.365417', 'step': 4375, 'epoch': 2} +{'type': 'loss', 'content': 0.005038236267864704, 'timestamp': '2025-09-10 02:47:27.371676', 'step': 4376, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:27.424881', 'step': 4376, 'epoch': 2} +{'type': 'loss', 'content': 0.005474306643009186, 'timestamp': '2025-09-10 02:47:27.427818', 'step': 4377, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:27.481370', 'step': 4377, 'epoch': 2} +{'type': 'loss', 'content': 0.0186654981225729, 'timestamp': '2025-09-10 02:47:27.489443', 'step': 4378, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:27.550403', 'step': 4378, 'epoch': 2} +{'type': 'loss', 'content': 0.01948568783700466, 'timestamp': '2025-09-10 02:47:27.561184', 'step': 4379, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:27.622433', 'step': 4379, 'epoch': 2} +{'type': 'loss', 'content': 0.02189786732196808, 'timestamp': '2025-09-10 02:47:27.633944', 'step': 4380, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:27.687133', 'step': 4380, 'epoch': 2} +{'type': 'loss', 'content': 0.000145500831422396, 'timestamp': '2025-09-10 02:47:27.689556', 'step': 4381, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:27.750020', 'step': 4381, 'epoch': 2} +{'type': 'loss', 'content': 0.0005379405338317156, 'timestamp': '2025-09-10 02:47:27.760798', 'step': 4382, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:27.814708', 'step': 4382, 'epoch': 2} +{'type': 'loss', 'content': 0.0008759471820667386, 'timestamp': '2025-09-10 02:47:27.817936', 'step': 4383, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:27.872522', 'step': 4383, 'epoch': 2} +{'type': 'loss', 'content': 0.0036357541102916002, 'timestamp': '2025-09-10 02:47:27.878528', 'step': 4384, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:27.931721', 'step': 4384, 'epoch': 2} +{'type': 'loss', 'content': 0.011262218467891216, 'timestamp': '2025-09-10 02:47:27.934234', 'step': 4385, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:27.987688', 'step': 4385, 'epoch': 2} +{'type': 'loss', 'content': 0.00036622449988499284, 'timestamp': '2025-09-10 02:47:27.990213', 'step': 4386, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:28.043575', 'step': 4386, 'epoch': 2} +{'type': 'loss', 'content': 0.03118307515978813, 'timestamp': '2025-09-10 02:47:28.045842', 'step': 4387, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:28.098880', 'step': 4387, 'epoch': 2} +{'type': 'loss', 'content': 0.021011371165513992, 'timestamp': '2025-09-10 02:47:28.104789', 'step': 4388, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:28.161951', 'step': 4388, 'epoch': 2} +{'type': 'loss', 'content': 0.016629895195364952, 'timestamp': '2025-09-10 02:47:28.173193', 'step': 4389, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:28.226099', 'step': 4389, 'epoch': 2} +{'type': 'loss', 'content': 0.0039963508024811745, 'timestamp': '2025-09-10 02:47:28.229293', 'step': 4390, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:28.284261', 'step': 4390, 'epoch': 2} +{'type': 'loss', 'content': 0.005318841896951199, 'timestamp': '2025-09-10 02:47:28.294078', 'step': 4391, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:28.347708', 'step': 4391, 'epoch': 2} +{'type': 'loss', 'content': 0.0018943492323160172, 'timestamp': '2025-09-10 02:47:28.353552', 'step': 4392, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:47:28.432124', 'step': 4392, 'epoch': 2} +{'type': 'loss', 'content': 0.002766832709312439, 'timestamp': '2025-09-10 02:47:28.448630', 'step': 4393, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:28.502893', 'step': 4393, 'epoch': 2} +{'type': 'loss', 'content': 0.0033191312104463577, 'timestamp': '2025-09-10 02:47:28.505114', 'step': 4394, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:28.564329', 'step': 4394, 'epoch': 2} +{'type': 'loss', 'content': 0.00220515881665051, 'timestamp': '2025-09-10 02:47:28.574763', 'step': 4395, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:28.628308', 'step': 4395, 'epoch': 2} +{'type': 'loss', 'content': 0.002763511147350073, 'timestamp': '2025-09-10 02:47:28.634534', 'step': 4396, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:28.688040', 'step': 4396, 'epoch': 2} +{'type': 'loss', 'content': 0.04486337676644325, 'timestamp': '2025-09-10 02:47:28.690279', 'step': 4397, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:28.744037', 'step': 4397, 'epoch': 2} +{'type': 'loss', 'content': 0.006099449936300516, 'timestamp': '2025-09-10 02:47:28.746327', 'step': 4398, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:28.807899', 'step': 4398, 'epoch': 2} +{'type': 'loss', 'content': 0.005696186330169439, 'timestamp': '2025-09-10 02:47:28.818823', 'step': 4399, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:28.872588', 'step': 4399, 'epoch': 2} +{'type': 'loss', 'content': 0.0006501591997221112, 'timestamp': '2025-09-10 02:47:28.878378', 'step': 4400, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:47:28.944775', 'step': 4400, 'epoch': 2} +{'type': 'loss', 'content': 0.001073510036803782, 'timestamp': '2025-09-10 02:47:28.958395', 'step': 4401, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:29.017340', 'step': 4401, 'epoch': 2} +{'type': 'loss', 'content': 0.0008090257761068642, 'timestamp': '2025-09-10 02:47:29.027786', 'step': 4402, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:29.081620', 'step': 4402, 'epoch': 2} +{'type': 'loss', 'content': 0.0020783047657459974, 'timestamp': '2025-09-10 02:47:29.083927', 'step': 4403, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:29.136892', 'step': 4403, 'epoch': 2} +{'type': 'loss', 'content': 0.015986433252692223, 'timestamp': '2025-09-10 02:47:29.142730', 'step': 4404, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:29.195275', 'step': 4404, 'epoch': 2} +{'type': 'loss', 'content': 0.004745179321616888, 'timestamp': '2025-09-10 02:47:29.202061', 'step': 4405, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:29.255072', 'step': 4405, 'epoch': 2} +{'type': 'loss', 'content': 0.014310779049992561, 'timestamp': '2025-09-10 02:47:29.257431', 'step': 4406, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:29.310227', 'step': 4406, 'epoch': 2} +{'type': 'loss', 'content': 0.0026371285784989595, 'timestamp': '2025-09-10 02:47:29.312677', 'step': 4407, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:29.371048', 'step': 4407, 'epoch': 2} +{'type': 'loss', 'content': 0.04145417362451553, 'timestamp': '2025-09-10 02:47:29.382259', 'step': 4408, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:29.435075', 'step': 4408, 'epoch': 2} +{'type': 'loss', 'content': 0.006641158368438482, 'timestamp': '2025-09-10 02:47:29.437470', 'step': 4409, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:29.490578', 'step': 4409, 'epoch': 2} +{'type': 'loss', 'content': 0.0005713313585147262, 'timestamp': '2025-09-10 02:47:29.492923', 'step': 4410, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:47:46.372443', 'step': 4410, 'epoch': 2} +{'type': 'pplx', 'content': 26986518.333128188, 'timestamp': '2025-09-10 02:47:46.375775', 'step': 4410, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:46.431427', 'step': 4410, 'epoch': 2} +{'type': 'loss', 'content': 0.00368632678873837, 'timestamp': '2025-09-10 02:47:46.436209', 'step': 4411, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:46.490376', 'step': 4411, 'epoch': 2} +{'type': 'loss', 'content': 0.0013429404934868217, 'timestamp': '2025-09-10 02:47:46.496991', 'step': 4412, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:46.552492', 'step': 4412, 'epoch': 2} +{'type': 'loss', 'content': 0.0072911703027784824, 'timestamp': '2025-09-10 02:47:46.554516', 'step': 4413, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:46.609303', 'step': 4413, 'epoch': 2} +{'type': 'loss', 'content': 0.013402380980551243, 'timestamp': '2025-09-10 02:47:46.618086', 'step': 4414, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:46.672426', 'step': 4414, 'epoch': 2} +{'type': 'loss', 'content': 0.004903607070446014, 'timestamp': '2025-09-10 02:47:46.678212', 'step': 4415, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:46.731758', 'step': 4415, 'epoch': 2} +{'type': 'loss', 'content': 0.009532332420349121, 'timestamp': '2025-09-10 02:47:46.738674', 'step': 4416, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:46.796297', 'step': 4416, 'epoch': 2} +{'type': 'loss', 'content': 0.0034129994455724955, 'timestamp': '2025-09-10 02:47:46.807498', 'step': 4417, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:46.862238', 'step': 4417, 'epoch': 2} +{'type': 'loss', 'content': 0.019939128309488297, 'timestamp': '2025-09-10 02:47:46.871800', 'step': 4418, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:46.926254', 'step': 4418, 'epoch': 2} +{'type': 'loss', 'content': 0.007876886054873466, 'timestamp': '2025-09-10 02:47:46.928562', 'step': 4419, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:46.981815', 'step': 4419, 'epoch': 2} +{'type': 'loss', 'content': 0.020741092041134834, 'timestamp': '2025-09-10 02:47:46.988171', 'step': 4420, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:47.041047', 'step': 4420, 'epoch': 2} +{'type': 'loss', 'content': 0.008140448480844498, 'timestamp': '2025-09-10 02:47:47.043534', 'step': 4421, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:47.096634', 'step': 4421, 'epoch': 2} +{'type': 'loss', 'content': 0.0026586491148918867, 'timestamp': '2025-09-10 02:47:47.101033', 'step': 4422, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:47.160595', 'step': 4422, 'epoch': 2} +{'type': 'loss', 'content': 0.0011719962349161506, 'timestamp': '2025-09-10 02:47:47.162787', 'step': 4423, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:47.216280', 'step': 4423, 'epoch': 2} +{'type': 'loss', 'content': 0.025557424873113632, 'timestamp': '2025-09-10 02:47:47.226716', 'step': 4424, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:47.279396', 'step': 4424, 'epoch': 2} +{'type': 'loss', 'content': 0.0019124869722872972, 'timestamp': '2025-09-10 02:47:47.289294', 'step': 4425, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:47.343599', 'step': 4425, 'epoch': 2} +{'type': 'loss', 'content': 0.00876543577760458, 'timestamp': '2025-09-10 02:47:47.345977', 'step': 4426, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:47.399377', 'step': 4426, 'epoch': 2} +{'type': 'loss', 'content': 0.0014942241832613945, 'timestamp': '2025-09-10 02:47:47.407427', 'step': 4427, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:47.460420', 'step': 4427, 'epoch': 2} +{'type': 'loss', 'content': 0.0005349894636310637, 'timestamp': '2025-09-10 02:47:47.466715', 'step': 4428, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:47.519601', 'step': 4428, 'epoch': 2} +{'type': 'loss', 'content': 0.004284702241420746, 'timestamp': '2025-09-10 02:47:47.521834', 'step': 4429, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:47.575073', 'step': 4429, 'epoch': 2} +{'type': 'loss', 'content': 0.027856361120939255, 'timestamp': '2025-09-10 02:47:47.577462', 'step': 4430, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:47.634791', 'step': 4430, 'epoch': 2} +{'type': 'loss', 'content': 0.001220623031258583, 'timestamp': '2025-09-10 02:47:47.641113', 'step': 4431, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:47:47.702690', 'step': 4431, 'epoch': 2} +{'type': 'loss', 'content': 0.017930852249264717, 'timestamp': '2025-09-10 02:47:47.714150', 'step': 4432, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:47.767014', 'step': 4432, 'epoch': 2} +{'type': 'loss', 'content': 0.00795585848391056, 'timestamp': '2025-09-10 02:47:47.769221', 'step': 4433, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:47.822118', 'step': 4433, 'epoch': 2} +{'type': 'loss', 'content': 0.000394886068534106, 'timestamp': '2025-09-10 02:47:47.824665', 'step': 4434, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:47.878050', 'step': 4434, 'epoch': 2} +{'type': 'loss', 'content': 0.001962206093594432, 'timestamp': '2025-09-10 02:47:47.880975', 'step': 4435, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:47.934274', 'step': 4435, 'epoch': 2} +{'type': 'loss', 'content': 0.004236937500536442, 'timestamp': '2025-09-10 02:47:47.940325', 'step': 4436, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:47:48.014896', 'step': 4436, 'epoch': 2} +{'type': 'loss', 'content': 0.0005876132054254413, 'timestamp': '2025-09-10 02:47:48.030310', 'step': 4437, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:48.084293', 'step': 4437, 'epoch': 2} +{'type': 'loss', 'content': 0.005827220622450113, 'timestamp': '2025-09-10 02:47:48.087091', 'step': 4438, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:48.140322', 'step': 4438, 'epoch': 2} +{'type': 'loss', 'content': 0.026568984612822533, 'timestamp': '2025-09-10 02:47:48.142936', 'step': 4439, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:48.196893', 'step': 4439, 'epoch': 2} +{'type': 'loss', 'content': 0.014371698722243309, 'timestamp': '2025-09-10 02:47:48.207233', 'step': 4440, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:48.260269', 'step': 4440, 'epoch': 2} +{'type': 'loss', 'content': 0.005732525605708361, 'timestamp': '2025-09-10 02:47:48.262402', 'step': 4441, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:48.328900', 'step': 4441, 'epoch': 2} +{'type': 'loss', 'content': 0.007794953417032957, 'timestamp': '2025-09-10 02:47:48.341135', 'step': 4442, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:48.394938', 'step': 4442, 'epoch': 2} +{'type': 'loss', 'content': 0.0007660404080525041, 'timestamp': '2025-09-10 02:47:48.398067', 'step': 4443, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:48.451564', 'step': 4443, 'epoch': 2} +{'type': 'loss', 'content': 0.0017210535006597638, 'timestamp': '2025-09-10 02:47:48.457591', 'step': 4444, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:47:48.517957', 'step': 4444, 'epoch': 2} +{'type': 'loss', 'content': 0.0009950220119208097, 'timestamp': '2025-09-10 02:47:48.529948', 'step': 4445, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:48.583845', 'step': 4445, 'epoch': 2} +{'type': 'loss', 'content': 0.04135027155280113, 'timestamp': '2025-09-10 02:47:48.586068', 'step': 4446, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:48.640996', 'step': 4446, 'epoch': 2} +{'type': 'loss', 'content': 0.027567947283387184, 'timestamp': '2025-09-10 02:47:48.650796', 'step': 4447, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:48.717830', 'step': 4447, 'epoch': 2} +{'type': 'loss', 'content': 0.0012885911855846643, 'timestamp': '2025-09-10 02:47:48.730870', 'step': 4448, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:48.783238', 'step': 4448, 'epoch': 2} +{'type': 'loss', 'content': 0.004028314258903265, 'timestamp': '2025-09-10 02:47:48.785386', 'step': 4449, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:48.852207', 'step': 4449, 'epoch': 2} +{'type': 'loss', 'content': 0.03624418377876282, 'timestamp': '2025-09-10 02:47:48.864420', 'step': 4450, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:48.917895', 'step': 4450, 'epoch': 2} +{'type': 'loss', 'content': 0.01659783348441124, 'timestamp': '2025-09-10 02:47:48.920762', 'step': 4451, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:48.974183', 'step': 4451, 'epoch': 2} +{'type': 'loss', 'content': 0.03107641078531742, 'timestamp': '2025-09-10 02:47:48.982814', 'step': 4452, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:49.035361', 'step': 4452, 'epoch': 2} +{'type': 'loss', 'content': 0.007877948693931103, 'timestamp': '2025-09-10 02:47:49.037827', 'step': 4453, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:49.091001', 'step': 4453, 'epoch': 2} +{'type': 'loss', 'content': 0.0068859620951116085, 'timestamp': '2025-09-10 02:47:49.097542', 'step': 4454, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:49.150756', 'step': 4454, 'epoch': 2} +{'type': 'loss', 'content': 0.001690584933385253, 'timestamp': '2025-09-10 02:47:49.153114', 'step': 4455, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:49.206808', 'step': 4455, 'epoch': 2} +{'type': 'loss', 'content': 0.0005641180323436856, 'timestamp': '2025-09-10 02:47:49.217219', 'step': 4456, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:49.270188', 'step': 4456, 'epoch': 2} +{'type': 'loss', 'content': 0.004897342063486576, 'timestamp': '2025-09-10 02:47:49.278386', 'step': 4457, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:49.331149', 'step': 4457, 'epoch': 2} +{'type': 'loss', 'content': 0.002342212712392211, 'timestamp': '2025-09-10 02:47:49.333383', 'step': 4458, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:49.386149', 'step': 4458, 'epoch': 2} +{'type': 'loss', 'content': 0.001183982822112739, 'timestamp': '2025-09-10 02:47:49.388411', 'step': 4459, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:49.442338', 'step': 4459, 'epoch': 2} +{'type': 'loss', 'content': 0.009718267247080803, 'timestamp': '2025-09-10 02:47:49.448215', 'step': 4460, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:49.500639', 'step': 4460, 'epoch': 2} +{'type': 'loss', 'content': 0.00479681883007288, 'timestamp': '2025-09-10 02:47:49.508892', 'step': 4461, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:49.567338', 'step': 4461, 'epoch': 2} +{'type': 'loss', 'content': 0.010085642337799072, 'timestamp': '2025-09-10 02:47:49.577781', 'step': 4462, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:49.631572', 'step': 4462, 'epoch': 2} +{'type': 'loss', 'content': 0.04318024963140488, 'timestamp': '2025-09-10 02:47:49.633912', 'step': 4463, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:49.692634', 'step': 4463, 'epoch': 2} +{'type': 'loss', 'content': 0.014200977981090546, 'timestamp': '2025-09-10 02:47:49.703836', 'step': 4464, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:49.756876', 'step': 4464, 'epoch': 2} +{'type': 'loss', 'content': 0.004874747712165117, 'timestamp': '2025-09-10 02:47:49.759193', 'step': 4465, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:49.812307', 'step': 4465, 'epoch': 2} +{'type': 'loss', 'content': 0.000795876607298851, 'timestamp': '2025-09-10 02:47:49.818982', 'step': 4466, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:49.872125', 'step': 4466, 'epoch': 2} +{'type': 'loss', 'content': 0.0014039665693417192, 'timestamp': '2025-09-10 02:47:49.880235', 'step': 4467, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:49.938844', 'step': 4467, 'epoch': 2} +{'type': 'loss', 'content': 0.004372471943497658, 'timestamp': '2025-09-10 02:47:49.950031', 'step': 4468, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:50.002702', 'step': 4468, 'epoch': 2} +{'type': 'loss', 'content': 0.004679134581238031, 'timestamp': '2025-09-10 02:47:50.008993', 'step': 4469, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:50.062183', 'step': 4469, 'epoch': 2} +{'type': 'loss', 'content': 0.003524158149957657, 'timestamp': '2025-09-10 02:47:50.065244', 'step': 4470, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:50.118988', 'step': 4470, 'epoch': 2} +{'type': 'loss', 'content': 0.0075031062588095665, 'timestamp': '2025-09-10 02:47:50.121151', 'step': 4471, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:50.174180', 'step': 4471, 'epoch': 2} +{'type': 'loss', 'content': 0.011620745994150639, 'timestamp': '2025-09-10 02:47:50.180186', 'step': 4472, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:50.232779', 'step': 4472, 'epoch': 2} +{'type': 'loss', 'content': 0.010053029283881187, 'timestamp': '2025-09-10 02:47:50.235072', 'step': 4473, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:50.288500', 'step': 4473, 'epoch': 2} +{'type': 'loss', 'content': 0.0026370359119027853, 'timestamp': '2025-09-10 02:47:50.290865', 'step': 4474, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:50.343895', 'step': 4474, 'epoch': 2} +{'type': 'loss', 'content': 0.014432991854846478, 'timestamp': '2025-09-10 02:47:50.345948', 'step': 4475, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:50.398608', 'step': 4475, 'epoch': 2} +{'type': 'loss', 'content': 0.011551310308277607, 'timestamp': '2025-09-10 02:47:50.404498', 'step': 4476, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:50.457243', 'step': 4476, 'epoch': 2} +{'type': 'loss', 'content': 0.017103832215070724, 'timestamp': '2025-09-10 02:47:50.467319', 'step': 4477, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:50.520885', 'step': 4477, 'epoch': 2} +{'type': 'loss', 'content': 0.005751727614551783, 'timestamp': '2025-09-10 02:47:50.523316', 'step': 4478, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:50.577079', 'step': 4478, 'epoch': 2} +{'type': 'loss', 'content': 0.008228392340242863, 'timestamp': '2025-09-10 02:47:50.579243', 'step': 4479, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:50.632165', 'step': 4479, 'epoch': 2} +{'type': 'loss', 'content': 0.0019313262309879065, 'timestamp': '2025-09-10 02:47:50.639600', 'step': 4480, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:50.692395', 'step': 4480, 'epoch': 2} +{'type': 'loss', 'content': 0.002656809287145734, 'timestamp': '2025-09-10 02:47:50.694557', 'step': 4481, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:50.747605', 'step': 4481, 'epoch': 2} +{'type': 'loss', 'content': 0.05354071408510208, 'timestamp': '2025-09-10 02:47:50.755806', 'step': 4482, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:50.809249', 'step': 4482, 'epoch': 2} +{'type': 'loss', 'content': 0.005342540796846151, 'timestamp': '2025-09-10 02:47:50.811794', 'step': 4483, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:50.865535', 'step': 4483, 'epoch': 2} +{'type': 'loss', 'content': 0.007268095854669809, 'timestamp': '2025-09-10 02:47:50.871591', 'step': 4484, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:50.924482', 'step': 4484, 'epoch': 2} +{'type': 'loss', 'content': 0.0012478465214371681, 'timestamp': '2025-09-10 02:47:50.931016', 'step': 4485, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:50.984044', 'step': 4485, 'epoch': 2} +{'type': 'loss', 'content': 0.01635042019188404, 'timestamp': '2025-09-10 02:47:50.986440', 'step': 4486, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:51.040629', 'step': 4486, 'epoch': 2} +{'type': 'loss', 'content': 0.008009052835404873, 'timestamp': '2025-09-10 02:47:51.042989', 'step': 4487, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:51.096545', 'step': 4487, 'epoch': 2} +{'type': 'loss', 'content': 0.009476982988417149, 'timestamp': '2025-09-10 02:47:51.103613', 'step': 4488, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:51.156347', 'step': 4488, 'epoch': 2} +{'type': 'loss', 'content': 0.0029068950098007917, 'timestamp': '2025-09-10 02:47:51.162506', 'step': 4489, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:51.217044', 'step': 4489, 'epoch': 2} +{'type': 'loss', 'content': 0.0013975396286696196, 'timestamp': '2025-09-10 02:47:51.226856', 'step': 4490, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:51.280672', 'step': 4490, 'epoch': 2} +{'type': 'loss', 'content': 0.0050372472032904625, 'timestamp': '2025-09-10 02:47:51.283128', 'step': 4491, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:51.337786', 'step': 4491, 'epoch': 2} +{'type': 'loss', 'content': 0.002294789766892791, 'timestamp': '2025-09-10 02:47:51.348077', 'step': 4492, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:47:51.414146', 'step': 4492, 'epoch': 2} +{'type': 'loss', 'content': 0.0007329504587687552, 'timestamp': '2025-09-10 02:47:51.427400', 'step': 4493, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:51.489710', 'step': 4493, 'epoch': 2} +{'type': 'loss', 'content': 0.010436500422656536, 'timestamp': '2025-09-10 02:47:51.500631', 'step': 4494, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:51.554762', 'step': 4494, 'epoch': 2} +{'type': 'loss', 'content': 0.0012955855345353484, 'timestamp': '2025-09-10 02:47:51.557071', 'step': 4495, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:51.615039', 'step': 4495, 'epoch': 2} +{'type': 'loss', 'content': 0.005135711748152971, 'timestamp': '2025-09-10 02:47:51.626232', 'step': 4496, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:51.680058', 'step': 4496, 'epoch': 2} +{'type': 'loss', 'content': 0.01051540020853281, 'timestamp': '2025-09-10 02:47:51.682602', 'step': 4497, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:51.736962', 'step': 4497, 'epoch': 2} +{'type': 'loss', 'content': 0.002059691585600376, 'timestamp': '2025-09-10 02:47:51.740034', 'step': 4498, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:51.794259', 'step': 4498, 'epoch': 2} +{'type': 'loss', 'content': 0.0026808774564415216, 'timestamp': '2025-09-10 02:47:51.800331', 'step': 4499, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:51.853902', 'step': 4499, 'epoch': 2} +{'type': 'loss', 'content': 0.0028993524610996246, 'timestamp': '2025-09-10 02:47:51.861017', 'step': 4500, 'epoch': 2} +{'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-10 02:47:52.316495', 'step': 4500, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:52.375381', 'step': 4500, 'epoch': 2} +{'type': 'loss', 'content': 0.004151905421167612, 'timestamp': '2025-09-10 02:47:52.385609', 'step': 4501, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:52.441643', 'step': 4501, 'epoch': 2} +{'type': 'loss', 'content': 0.008255732245743275, 'timestamp': '2025-09-10 02:47:52.443853', 'step': 4502, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:52.498955', 'step': 4502, 'epoch': 2} +{'type': 'loss', 'content': 0.004449177533388138, 'timestamp': '2025-09-10 02:47:52.508718', 'step': 4503, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:52.562780', 'step': 4503, 'epoch': 2} +{'type': 'loss', 'content': 0.0021090207155793905, 'timestamp': '2025-09-10 02:47:52.568994', 'step': 4504, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:52.622805', 'step': 4504, 'epoch': 2} +{'type': 'loss', 'content': 0.0027156206779181957, 'timestamp': '2025-09-10 02:47:52.624931', 'step': 4505, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:52.678005', 'step': 4505, 'epoch': 2} +{'type': 'loss', 'content': 0.007852545008063316, 'timestamp': '2025-09-10 02:47:52.680663', 'step': 4506, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:52.733333', 'step': 4506, 'epoch': 2} +{'type': 'loss', 'content': 0.015431736595928669, 'timestamp': '2025-09-10 02:47:52.735688', 'step': 4507, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:47:52.797065', 'step': 4507, 'epoch': 2} +{'type': 'loss', 'content': 0.0024575102142989635, 'timestamp': '2025-09-10 02:47:52.808919', 'step': 4508, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:52.862394', 'step': 4508, 'epoch': 2} +{'type': 'loss', 'content': 0.0045385705307126045, 'timestamp': '2025-09-10 02:47:52.864641', 'step': 4509, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:52.919201', 'step': 4509, 'epoch': 2} +{'type': 'loss', 'content': 0.011222070083022118, 'timestamp': '2025-09-10 02:47:52.928979', 'step': 4510, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:52.982985', 'step': 4510, 'epoch': 2} +{'type': 'loss', 'content': 0.0019330204231664538, 'timestamp': '2025-09-10 02:47:52.985931', 'step': 4511, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:53.040068', 'step': 4511, 'epoch': 2} +{'type': 'loss', 'content': 0.0077409460209310055, 'timestamp': '2025-09-10 02:47:53.046634', 'step': 4512, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:53.099528', 'step': 4512, 'epoch': 2} +{'type': 'loss', 'content': 0.0020350662525743246, 'timestamp': '2025-09-10 02:47:53.102428', 'step': 4513, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:53.163653', 'step': 4513, 'epoch': 2} +{'type': 'loss', 'content': 0.008962714113295078, 'timestamp': '2025-09-10 02:47:53.174537', 'step': 4514, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:53.233054', 'step': 4514, 'epoch': 2} +{'type': 'loss', 'content': 0.005773603916168213, 'timestamp': '2025-09-10 02:47:53.235525', 'step': 4515, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:53.290255', 'step': 4515, 'epoch': 2} +{'type': 'loss', 'content': 0.005459383130073547, 'timestamp': '2025-09-10 02:47:53.296732', 'step': 4516, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:53.350979', 'step': 4516, 'epoch': 2} +{'type': 'loss', 'content': 0.0025894774589687586, 'timestamp': '2025-09-10 02:47:53.353330', 'step': 4517, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:47:53.413894', 'step': 4517, 'epoch': 2} +{'type': 'loss', 'content': 0.0017120438860729337, 'timestamp': '2025-09-10 02:47:53.423494', 'step': 4518, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:53.481695', 'step': 4518, 'epoch': 2} +{'type': 'loss', 'content': 0.028368426486849785, 'timestamp': '2025-09-10 02:47:53.483848', 'step': 4519, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:53.537150', 'step': 4519, 'epoch': 2} +{'type': 'loss', 'content': 0.00105516635812819, 'timestamp': '2025-09-10 02:47:53.543305', 'step': 4520, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:53.609654', 'step': 4520, 'epoch': 2} +{'type': 'loss', 'content': 0.007711261510848999, 'timestamp': '2025-09-10 02:47:53.621409', 'step': 4521, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:53.675707', 'step': 4521, 'epoch': 2} +{'type': 'loss', 'content': 0.0012892426457256079, 'timestamp': '2025-09-10 02:47:53.678340', 'step': 4522, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:53.733422', 'step': 4522, 'epoch': 2} +{'type': 'loss', 'content': 0.004775486886501312, 'timestamp': '2025-09-10 02:47:53.739571', 'step': 4523, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:53.800958', 'step': 4523, 'epoch': 2} +{'type': 'loss', 'content': 0.0003999762120656669, 'timestamp': '2025-09-10 02:47:53.808036', 'step': 4524, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:53.861900', 'step': 4524, 'epoch': 2} +{'type': 'loss', 'content': 0.0004046570393256843, 'timestamp': '2025-09-10 02:47:53.867983', 'step': 4525, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:53.921685', 'step': 4525, 'epoch': 2} +{'type': 'loss', 'content': 8.798386988928542e-05, 'timestamp': '2025-09-10 02:47:53.924127', 'step': 4526, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:53.979318', 'step': 4526, 'epoch': 2} +{'type': 'loss', 'content': 0.021559493616223335, 'timestamp': '2025-09-10 02:47:53.981596', 'step': 4527, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:54.036351', 'step': 4527, 'epoch': 2} +{'type': 'loss', 'content': 0.0032169115729629993, 'timestamp': '2025-09-10 02:47:54.042721', 'step': 4528, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:54.099529', 'step': 4528, 'epoch': 2} +{'type': 'loss', 'content': 0.032137926667928696, 'timestamp': '2025-09-10 02:47:54.101669', 'step': 4529, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:54.155066', 'step': 4529, 'epoch': 2} +{'type': 'loss', 'content': 0.012281983159482479, 'timestamp': '2025-09-10 02:47:54.157497', 'step': 4530, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:54.211395', 'step': 4530, 'epoch': 2} +{'type': 'loss', 'content': 0.0012799688847735524, 'timestamp': '2025-09-10 02:47:54.213781', 'step': 4531, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:54.267229', 'step': 4531, 'epoch': 2} +{'type': 'loss', 'content': 0.0017885107081383467, 'timestamp': '2025-09-10 02:47:54.276003', 'step': 4532, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:54.329359', 'step': 4532, 'epoch': 2} +{'type': 'loss', 'content': 0.023809725418686867, 'timestamp': '2025-09-10 02:47:54.339831', 'step': 4533, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:54.393415', 'step': 4533, 'epoch': 2} +{'type': 'loss', 'content': 0.007906203158199787, 'timestamp': '2025-09-10 02:47:54.397160', 'step': 4534, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:54.451596', 'step': 4534, 'epoch': 2} +{'type': 'loss', 'content': 0.0025535523891448975, 'timestamp': '2025-09-10 02:47:54.454018', 'step': 4535, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:54.506763', 'step': 4535, 'epoch': 2} +{'type': 'loss', 'content': 0.009981702081859112, 'timestamp': '2025-09-10 02:47:54.512813', 'step': 4536, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:47:54.569669', 'step': 4536, 'epoch': 2} +{'type': 'loss', 'content': 0.011019224300980568, 'timestamp': '2025-09-10 02:47:54.580813', 'step': 4537, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:54.635269', 'step': 4537, 'epoch': 2} +{'type': 'loss', 'content': 0.006600281689316034, 'timestamp': '2025-09-10 02:47:54.637584', 'step': 4538, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:54.691005', 'step': 4538, 'epoch': 2} +{'type': 'loss', 'content': 0.0014997757971286774, 'timestamp': '2025-09-10 02:47:54.693746', 'step': 4539, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:54.748166', 'step': 4539, 'epoch': 2} +{'type': 'loss', 'content': 0.03488391265273094, 'timestamp': '2025-09-10 02:47:54.754490', 'step': 4540, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:47:54.807900', 'step': 4540, 'epoch': 2} +{'type': 'loss', 'content': 0.03410203754901886, 'timestamp': '2025-09-10 02:47:54.814001', 'step': 4541, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:54.867686', 'step': 4541, 'epoch': 2} +{'type': 'loss', 'content': 0.005616106558591127, 'timestamp': '2025-09-10 02:47:54.869947', 'step': 4542, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:54.923566', 'step': 4542, 'epoch': 2} +{'type': 'loss', 'content': 0.002888392424210906, 'timestamp': '2025-09-10 02:47:54.925786', 'step': 4543, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:47:54.986884', 'step': 4543, 'epoch': 2} +{'type': 'loss', 'content': 0.003357083071023226, 'timestamp': '2025-09-10 02:47:54.998589', 'step': 4544, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:47:55.052090', 'step': 4544, 'epoch': 2} +{'type': 'loss', 'content': 0.001123654656112194, 'timestamp': '2025-09-10 02:47:55.054538', 'step': 4545, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:55.107650', 'step': 4545, 'epoch': 2} +{'type': 'loss', 'content': 0.016921380534768105, 'timestamp': '2025-09-10 02:47:55.109962', 'step': 4546, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:47:55.163094', 'step': 4546, 'epoch': 2} +{'type': 'loss', 'content': 0.00217248173430562, 'timestamp': '2025-09-10 02:47:55.165938', 'step': 4547, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:47:55.238831', 'step': 4547, 'epoch': 2} +{'type': 'loss', 'content': 0.025616373866796494, 'timestamp': '2025-09-10 02:47:55.253038', 'step': 4548, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:55.307367', 'step': 4548, 'epoch': 2} +{'type': 'loss', 'content': 0.0028280389960855246, 'timestamp': '2025-09-10 02:47:55.309594', 'step': 4549, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:47:55.363361', 'step': 4549, 'epoch': 2} +{'type': 'loss', 'content': 0.00040746951708570123, 'timestamp': '2025-09-10 02:47:55.365659', 'step': 4550, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:47:55.420347', 'step': 4550, 'epoch': 2} +{'type': 'loss', 'content': 0.00039629219099879265, 'timestamp': '2025-09-10 02:47:55.430149', 'step': 4551, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:47:55.484181', 'step': 4551, 'epoch': 2} +{'type': 'loss', 'content': 0.00037533658905886114, 'timestamp': '2025-09-10 02:47:55.490277', 'step': 4552, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:47:55.543180', 'step': 4552, 'epoch': 2} +{'type': 'loss', 'content': 0.002133527770638466, 'timestamp': '2025-09-10 02:47:55.551319', 'step': 4553, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:47:55.605657', 'step': 4553, 'epoch': 2} +{'type': 'loss', 'content': 0.0039528352208435535, 'timestamp': '2025-09-10 02:47:55.608146', 'step': 4554, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:47:55.661503', 'step': 4554, 'epoch': 2} +{'type': 'loss', 'content': 0.00594740454107523, 'timestamp': '2025-09-10 02:47:55.664112', 'step': 4555, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:47:55.717314', 'step': 4555, 'epoch': 2} +{'type': 'loss', 'content': 0.005808423273265362, 'timestamp': '2025-09-10 02:47:55.723678', 'step': 4556, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:47:55.777210', 'step': 4556, 'epoch': 2} +{'type': 'loss', 'content': 0.002037533326074481, 'timestamp': '2025-09-10 02:47:55.779289', 'step': 4557, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:48:12.783083', 'step': 4557, 'epoch': 2} +{'type': 'pplx', 'content': 25457045.702036124, 'timestamp': '2025-09-10 02:48:12.786430', 'step': 4557, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:12.841461', 'step': 4557, 'epoch': 2} +{'type': 'loss', 'content': 0.0013372708344832063, 'timestamp': '2025-09-10 02:48:12.843615', 'step': 4558, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:12.897776', 'step': 4558, 'epoch': 2} +{'type': 'loss', 'content': 0.0003109157260041684, 'timestamp': '2025-09-10 02:48:12.900285', 'step': 4559, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:12.953566', 'step': 4559, 'epoch': 2} +{'type': 'loss', 'content': 0.015070393681526184, 'timestamp': '2025-09-10 02:48:12.960039', 'step': 4560, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:13.013580', 'step': 4560, 'epoch': 2} +{'type': 'loss', 'content': 0.005185488611459732, 'timestamp': '2025-09-10 02:48:13.016046', 'step': 4561, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:13.069234', 'step': 4561, 'epoch': 2} +{'type': 'loss', 'content': 0.00019084502127952874, 'timestamp': '2025-09-10 02:48:13.072021', 'step': 4562, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:13.125914', 'step': 4562, 'epoch': 2} +{'type': 'loss', 'content': 0.0009027626365423203, 'timestamp': '2025-09-10 02:48:13.135561', 'step': 4563, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:13.188957', 'step': 4563, 'epoch': 2} +{'type': 'loss', 'content': 0.0008088427712209523, 'timestamp': '2025-09-10 02:48:13.195039', 'step': 4564, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:13.247718', 'step': 4564, 'epoch': 2} +{'type': 'loss', 'content': 0.0062088086269795895, 'timestamp': '2025-09-10 02:48:13.254192', 'step': 4565, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:13.308462', 'step': 4565, 'epoch': 2} +{'type': 'loss', 'content': 0.0019485211232677102, 'timestamp': '2025-09-10 02:48:13.311277', 'step': 4566, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:13.365492', 'step': 4566, 'epoch': 2} +{'type': 'loss', 'content': 0.0016602884279564023, 'timestamp': '2025-09-10 02:48:13.368452', 'step': 4567, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:13.421602', 'step': 4567, 'epoch': 2} +{'type': 'loss', 'content': 0.0047590769827365875, 'timestamp': '2025-09-10 02:48:13.427489', 'step': 4568, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:13.480485', 'step': 4568, 'epoch': 2} +{'type': 'loss', 'content': 0.0018140958854928613, 'timestamp': '2025-09-10 02:48:13.482995', 'step': 4569, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:13.536373', 'step': 4569, 'epoch': 2} +{'type': 'loss', 'content': 0.0002656914002727717, 'timestamp': '2025-09-10 02:48:13.538599', 'step': 4570, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:13.592165', 'step': 4570, 'epoch': 2} +{'type': 'loss', 'content': 0.0005687407101504505, 'timestamp': '2025-09-10 02:48:13.594770', 'step': 4571, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:13.648266', 'step': 4571, 'epoch': 2} +{'type': 'loss', 'content': 0.03455723077058792, 'timestamp': '2025-09-10 02:48:13.657289', 'step': 4572, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:13.710169', 'step': 4572, 'epoch': 2} +{'type': 'loss', 'content': 0.022473115473985672, 'timestamp': '2025-09-10 02:48:13.713135', 'step': 4573, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:13.766542', 'step': 4573, 'epoch': 2} +{'type': 'loss', 'content': 0.03626299276947975, 'timestamp': '2025-09-10 02:48:13.769665', 'step': 4574, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:13.822892', 'step': 4574, 'epoch': 2} +{'type': 'loss', 'content': 0.009046868421137333, 'timestamp': '2025-09-10 02:48:13.825215', 'step': 4575, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:13.878591', 'step': 4575, 'epoch': 2} +{'type': 'loss', 'content': 0.0004490011197049171, 'timestamp': '2025-09-10 02:48:13.884666', 'step': 4576, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:13.937358', 'step': 4576, 'epoch': 2} +{'type': 'loss', 'content': 0.014431804418563843, 'timestamp': '2025-09-10 02:48:13.947611', 'step': 4577, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:14.002526', 'step': 4577, 'epoch': 2} +{'type': 'loss', 'content': 0.01755792647600174, 'timestamp': '2025-09-10 02:48:14.004836', 'step': 4578, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:14.058529', 'step': 4578, 'epoch': 2} +{'type': 'loss', 'content': 0.0009076729184016585, 'timestamp': '2025-09-10 02:48:14.061292', 'step': 4579, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:14.114465', 'step': 4579, 'epoch': 2} +{'type': 'loss', 'content': 0.0002662291517481208, 'timestamp': '2025-09-10 02:48:14.120625', 'step': 4580, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:14.173975', 'step': 4580, 'epoch': 2} +{'type': 'loss', 'content': 0.0013915960444137454, 'timestamp': '2025-09-10 02:48:14.180639', 'step': 4581, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:14.234678', 'step': 4581, 'epoch': 2} +{'type': 'loss', 'content': 0.004539549816399813, 'timestamp': '2025-09-10 02:48:14.241045', 'step': 4582, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:14.294925', 'step': 4582, 'epoch': 2} +{'type': 'loss', 'content': 0.00540183624252677, 'timestamp': '2025-09-10 02:48:14.304562', 'step': 4583, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:14.358173', 'step': 4583, 'epoch': 2} +{'type': 'loss', 'content': 0.001248289947398007, 'timestamp': '2025-09-10 02:48:14.364415', 'step': 4584, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:14.416709', 'step': 4584, 'epoch': 2} +{'type': 'loss', 'content': 0.0003251541347708553, 'timestamp': '2025-09-10 02:48:14.418898', 'step': 4585, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:14.473044', 'step': 4585, 'epoch': 2} +{'type': 'loss', 'content': 0.0060363272204995155, 'timestamp': '2025-09-10 02:48:14.475205', 'step': 4586, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:14.527786', 'step': 4586, 'epoch': 2} +{'type': 'loss', 'content': 0.0009328167070634663, 'timestamp': '2025-09-10 02:48:14.530051', 'step': 4587, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:48:14.603351', 'step': 4587, 'epoch': 2} +{'type': 'loss', 'content': 0.024946587160229683, 'timestamp': '2025-09-10 02:48:14.617833', 'step': 4588, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:14.671049', 'step': 4588, 'epoch': 2} +{'type': 'loss', 'content': 0.0025679387617856264, 'timestamp': '2025-09-10 02:48:14.677694', 'step': 4589, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:14.730941', 'step': 4589, 'epoch': 2} +{'type': 'loss', 'content': 0.016521627083420753, 'timestamp': '2025-09-10 02:48:14.734152', 'step': 4590, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:14.787113', 'step': 4590, 'epoch': 2} +{'type': 'loss', 'content': 0.0009985518408939242, 'timestamp': '2025-09-10 02:48:14.789353', 'step': 4591, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:14.842617', 'step': 4591, 'epoch': 2} +{'type': 'loss', 'content': 0.0015227339463308454, 'timestamp': '2025-09-10 02:48:14.848595', 'step': 4592, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:48:14.915137', 'step': 4592, 'epoch': 2} +{'type': 'loss', 'content': 0.00046674496843479574, 'timestamp': '2025-09-10 02:48:14.928780', 'step': 4593, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:14.982447', 'step': 4593, 'epoch': 2} +{'type': 'loss', 'content': 0.00017164714518003166, 'timestamp': '2025-09-10 02:48:14.984934', 'step': 4594, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:15.038909', 'step': 4594, 'epoch': 2} +{'type': 'loss', 'content': 0.0115129379555583, 'timestamp': '2025-09-10 02:48:15.041370', 'step': 4595, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:15.094916', 'step': 4595, 'epoch': 2} +{'type': 'loss', 'content': 0.01486434880644083, 'timestamp': '2025-09-10 02:48:15.101132', 'step': 4596, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:15.153942', 'step': 4596, 'epoch': 2} +{'type': 'loss', 'content': 0.007304795552045107, 'timestamp': '2025-09-10 02:48:15.161906', 'step': 4597, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:15.216088', 'step': 4597, 'epoch': 2} +{'type': 'loss', 'content': 0.001580969081260264, 'timestamp': '2025-09-10 02:48:15.218240', 'step': 4598, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:15.276675', 'step': 4598, 'epoch': 2} +{'type': 'loss', 'content': 0.0038990986067801714, 'timestamp': '2025-09-10 02:48:15.287065', 'step': 4599, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:15.342447', 'step': 4599, 'epoch': 2} +{'type': 'loss', 'content': 0.045212119817733765, 'timestamp': '2025-09-10 02:48:15.348986', 'step': 4600, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:15.402806', 'step': 4600, 'epoch': 2} +{'type': 'loss', 'content': 0.011401698924601078, 'timestamp': '2025-09-10 02:48:15.405241', 'step': 4601, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:15.458613', 'step': 4601, 'epoch': 2} +{'type': 'loss', 'content': 0.0001965171832125634, 'timestamp': '2025-09-10 02:48:15.460821', 'step': 4602, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:48:15.535232', 'step': 4602, 'epoch': 2} +{'type': 'loss', 'content': 0.004868685267865658, 'timestamp': '2025-09-10 02:48:15.549158', 'step': 4603, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:15.603224', 'step': 4603, 'epoch': 2} +{'type': 'loss', 'content': 0.001720248837955296, 'timestamp': '2025-09-10 02:48:15.610345', 'step': 4604, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:15.663532', 'step': 4604, 'epoch': 2} +{'type': 'loss', 'content': 0.006380091421306133, 'timestamp': '2025-09-10 02:48:15.669676', 'step': 4605, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:15.722482', 'step': 4605, 'epoch': 2} +{'type': 'loss', 'content': 0.000943073071539402, 'timestamp': '2025-09-10 02:48:15.724874', 'step': 4606, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:15.778402', 'step': 4606, 'epoch': 2} +{'type': 'loss', 'content': 0.018165066838264465, 'timestamp': '2025-09-10 02:48:15.780882', 'step': 4607, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:15.834454', 'step': 4607, 'epoch': 2} +{'type': 'loss', 'content': 0.00024254433810710907, 'timestamp': '2025-09-10 02:48:15.840739', 'step': 4608, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:48:15.907229', 'step': 4608, 'epoch': 2} +{'type': 'loss', 'content': 0.0012426173780113459, 'timestamp': '2025-09-10 02:48:15.920808', 'step': 4609, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:15.977184', 'step': 4609, 'epoch': 2} +{'type': 'loss', 'content': 0.0008221525349654257, 'timestamp': '2025-09-10 02:48:15.983013', 'step': 4610, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:16.037876', 'step': 4610, 'epoch': 2} +{'type': 'loss', 'content': 0.0019179824739694595, 'timestamp': '2025-09-10 02:48:16.040034', 'step': 4611, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:16.094342', 'step': 4611, 'epoch': 2} +{'type': 'loss', 'content': 0.0018849697662517428, 'timestamp': '2025-09-10 02:48:16.100518', 'step': 4612, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:16.155728', 'step': 4612, 'epoch': 2} +{'type': 'loss', 'content': 0.005814618896692991, 'timestamp': '2025-09-10 02:48:16.158427', 'step': 4613, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:48:16.219164', 'step': 4613, 'epoch': 2} +{'type': 'loss', 'content': 0.006916634738445282, 'timestamp': '2025-09-10 02:48:16.230045', 'step': 4614, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:16.283305', 'step': 4614, 'epoch': 2} +{'type': 'loss', 'content': 0.006601746194064617, 'timestamp': '2025-09-10 02:48:16.285473', 'step': 4615, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:16.339057', 'step': 4615, 'epoch': 2} +{'type': 'loss', 'content': 0.001805769861675799, 'timestamp': '2025-09-10 02:48:16.345074', 'step': 4616, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:16.397654', 'step': 4616, 'epoch': 2} +{'type': 'loss', 'content': 0.0014912709593772888, 'timestamp': '2025-09-10 02:48:16.399492', 'step': 4617, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:16.452396', 'step': 4617, 'epoch': 2} +{'type': 'loss', 'content': 0.0016789039364084601, 'timestamp': '2025-09-10 02:48:16.458872', 'step': 4618, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:16.512887', 'step': 4618, 'epoch': 2} +{'type': 'loss', 'content': 0.015009819529950619, 'timestamp': '2025-09-10 02:48:16.514944', 'step': 4619, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:48:16.581171', 'step': 4619, 'epoch': 2} +{'type': 'loss', 'content': 0.000642686674837023, 'timestamp': '2025-09-10 02:48:16.594187', 'step': 4620, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:16.646979', 'step': 4620, 'epoch': 2} +{'type': 'loss', 'content': 0.0002797394699882716, 'timestamp': '2025-09-10 02:48:16.649028', 'step': 4621, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:16.701895', 'step': 4621, 'epoch': 2} +{'type': 'loss', 'content': 0.00236245128326118, 'timestamp': '2025-09-10 02:48:16.710104', 'step': 4622, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:16.767951', 'step': 4622, 'epoch': 2} +{'type': 'loss', 'content': 0.0010664359433576465, 'timestamp': '2025-09-10 02:48:16.778323', 'step': 4623, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:16.832155', 'step': 4623, 'epoch': 2} +{'type': 'loss', 'content': 0.0032224238384515047, 'timestamp': '2025-09-10 02:48:16.842546', 'step': 4624, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:16.895455', 'step': 4624, 'epoch': 2} +{'type': 'loss', 'content': 0.00012770752073265612, 'timestamp': '2025-09-10 02:48:16.897775', 'step': 4625, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:16.951123', 'step': 4625, 'epoch': 2} +{'type': 'loss', 'content': 0.0004707627522293478, 'timestamp': '2025-09-10 02:48:16.960739', 'step': 4626, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:17.015417', 'step': 4626, 'epoch': 2} +{'type': 'loss', 'content': 0.0031918175518512726, 'timestamp': '2025-09-10 02:48:17.017831', 'step': 4627, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:17.075896', 'step': 4627, 'epoch': 2} +{'type': 'loss', 'content': 0.00040983394137583673, 'timestamp': '2025-09-10 02:48:17.087095', 'step': 4628, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:17.141273', 'step': 4628, 'epoch': 2} +{'type': 'loss', 'content': 0.00035925681004300714, 'timestamp': '2025-09-10 02:48:17.151804', 'step': 4629, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:17.204776', 'step': 4629, 'epoch': 2} +{'type': 'loss', 'content': 0.009273489005863667, 'timestamp': '2025-09-10 02:48:17.206902', 'step': 4630, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:17.259553', 'step': 4630, 'epoch': 2} +{'type': 'loss', 'content': 0.00020392243459355086, 'timestamp': '2025-09-10 02:48:17.261664', 'step': 4631, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:17.315539', 'step': 4631, 'epoch': 2} +{'type': 'loss', 'content': 0.00037995242746546865, 'timestamp': '2025-09-10 02:48:17.321515', 'step': 4632, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:17.373896', 'step': 4632, 'epoch': 2} +{'type': 'loss', 'content': 0.0006583416252397001, 'timestamp': '2025-09-10 02:48:17.375962', 'step': 4633, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:17.429133', 'step': 4633, 'epoch': 2} +{'type': 'loss', 'content': 0.005158471409231424, 'timestamp': '2025-09-10 02:48:17.431326', 'step': 4634, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:17.484568', 'step': 4634, 'epoch': 2} +{'type': 'loss', 'content': 0.025188740342855453, 'timestamp': '2025-09-10 02:48:17.486448', 'step': 4635, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:17.539696', 'step': 4635, 'epoch': 2} +{'type': 'loss', 'content': 0.006703072227537632, 'timestamp': '2025-09-10 02:48:17.545776', 'step': 4636, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:17.598134', 'step': 4636, 'epoch': 2} +{'type': 'loss', 'content': 0.0010374571429565549, 'timestamp': '2025-09-10 02:48:17.604558', 'step': 4637, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:17.657636', 'step': 4637, 'epoch': 2} +{'type': 'loss', 'content': 0.00020061612303834409, 'timestamp': '2025-09-10 02:48:17.659960', 'step': 4638, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:48:17.721362', 'step': 4638, 'epoch': 2} +{'type': 'loss', 'content': 0.005455473903566599, 'timestamp': '2025-09-10 02:48:17.732474', 'step': 4639, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:48:17.793523', 'step': 4639, 'epoch': 2} +{'type': 'loss', 'content': 0.0005772277945652604, 'timestamp': '2025-09-10 02:48:17.805224', 'step': 4640, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:17.857909', 'step': 4640, 'epoch': 2} +{'type': 'loss', 'content': 0.000132946006488055, 'timestamp': '2025-09-10 02:48:17.859997', 'step': 4641, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:48:17.920673', 'step': 4641, 'epoch': 2} +{'type': 'loss', 'content': 0.0029071313329041004, 'timestamp': '2025-09-10 02:48:17.931570', 'step': 4642, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:17.984585', 'step': 4642, 'epoch': 2} +{'type': 'loss', 'content': 0.02414119616150856, 'timestamp': '2025-09-10 02:48:17.986756', 'step': 4643, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:18.039395', 'step': 4643, 'epoch': 2} +{'type': 'loss', 'content': 0.0009457221603952348, 'timestamp': '2025-09-10 02:48:18.045427', 'step': 4644, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:18.097890', 'step': 4644, 'epoch': 2} +{'type': 'loss', 'content': 0.0003954698913730681, 'timestamp': '2025-09-10 02:48:18.099968', 'step': 4645, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:18.152777', 'step': 4645, 'epoch': 2} +{'type': 'loss', 'content': 0.03588508442044258, 'timestamp': '2025-09-10 02:48:18.154863', 'step': 4646, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:18.207590', 'step': 4646, 'epoch': 2} +{'type': 'loss', 'content': 0.0022157442290335894, 'timestamp': '2025-09-10 02:48:18.209790', 'step': 4647, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:18.264206', 'step': 4647, 'epoch': 2} +{'type': 'loss', 'content': 0.0004063074884470552, 'timestamp': '2025-09-10 02:48:18.274799', 'step': 4648, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:18.327280', 'step': 4648, 'epoch': 2} +{'type': 'loss', 'content': 0.0003567532985471189, 'timestamp': '2025-09-10 02:48:18.329467', 'step': 4649, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:18.382658', 'step': 4649, 'epoch': 2} +{'type': 'loss', 'content': 0.0009528659866191447, 'timestamp': '2025-09-10 02:48:18.384789', 'step': 4650, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:18.437897', 'step': 4650, 'epoch': 2} +{'type': 'loss', 'content': 0.01627149060368538, 'timestamp': '2025-09-10 02:48:18.444389', 'step': 4651, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:18.497743', 'step': 4651, 'epoch': 2} +{'type': 'loss', 'content': 0.0009323515696451068, 'timestamp': '2025-09-10 02:48:18.505149', 'step': 4652, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:18.558549', 'step': 4652, 'epoch': 2} +{'type': 'loss', 'content': 0.0004460910859052092, 'timestamp': '2025-09-10 02:48:18.560882', 'step': 4653, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:18.613277', 'step': 4653, 'epoch': 2} +{'type': 'loss', 'content': 0.00522157596424222, 'timestamp': '2025-09-10 02:48:18.615378', 'step': 4654, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:18.669237', 'step': 4654, 'epoch': 2} +{'type': 'loss', 'content': 0.0289036575704813, 'timestamp': '2025-09-10 02:48:18.671873', 'step': 4655, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:18.724852', 'step': 4655, 'epoch': 2} +{'type': 'loss', 'content': 0.002348503563553095, 'timestamp': '2025-09-10 02:48:18.730606', 'step': 4656, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:18.783259', 'step': 4656, 'epoch': 2} +{'type': 'loss', 'content': 0.012411413714289665, 'timestamp': '2025-09-10 02:48:18.791544', 'step': 4657, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:18.845196', 'step': 4657, 'epoch': 2} +{'type': 'loss', 'content': 0.00018182327039539814, 'timestamp': '2025-09-10 02:48:18.847437', 'step': 4658, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:18.900685', 'step': 4658, 'epoch': 2} +{'type': 'loss', 'content': 0.0004816818982362747, 'timestamp': '2025-09-10 02:48:18.903024', 'step': 4659, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:18.956289', 'step': 4659, 'epoch': 2} +{'type': 'loss', 'content': 0.0005066471057944, 'timestamp': '2025-09-10 02:48:18.965176', 'step': 4660, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:19.021462', 'step': 4660, 'epoch': 2} +{'type': 'loss', 'content': 0.0009116308647207916, 'timestamp': '2025-09-10 02:48:19.032691', 'step': 4661, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:19.086617', 'step': 4661, 'epoch': 2} +{'type': 'loss', 'content': 0.004365415778011084, 'timestamp': '2025-09-10 02:48:19.088768', 'step': 4662, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:48:19.154999', 'step': 4662, 'epoch': 2} +{'type': 'loss', 'content': 0.002365898573771119, 'timestamp': '2025-09-10 02:48:19.167211', 'step': 4663, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:19.220232', 'step': 4663, 'epoch': 2} +{'type': 'loss', 'content': 0.0018584148492664099, 'timestamp': '2025-09-10 02:48:19.226112', 'step': 4664, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:48:19.297487', 'step': 4664, 'epoch': 2} +{'type': 'loss', 'content': 0.016713660210371017, 'timestamp': '2025-09-10 02:48:19.312346', 'step': 4665, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:19.365392', 'step': 4665, 'epoch': 2} +{'type': 'loss', 'content': 0.005889121908694506, 'timestamp': '2025-09-10 02:48:19.368182', 'step': 4666, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:19.421471', 'step': 4666, 'epoch': 2} +{'type': 'loss', 'content': 0.004000083077698946, 'timestamp': '2025-09-10 02:48:19.423896', 'step': 4667, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:19.477187', 'step': 4667, 'epoch': 2} +{'type': 'loss', 'content': 0.011162602342665195, 'timestamp': '2025-09-10 02:48:19.483272', 'step': 4668, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:19.535333', 'step': 4668, 'epoch': 2} +{'type': 'loss', 'content': 0.0012390923220664263, 'timestamp': '2025-09-10 02:48:19.537946', 'step': 4669, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:19.590800', 'step': 4669, 'epoch': 2} +{'type': 'loss', 'content': 0.01437478419393301, 'timestamp': '2025-09-10 02:48:19.594610', 'step': 4670, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:19.650163', 'step': 4670, 'epoch': 2} +{'type': 'loss', 'content': 0.00072527612792328, 'timestamp': '2025-09-10 02:48:19.658289', 'step': 4671, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:19.711292', 'step': 4671, 'epoch': 2} +{'type': 'loss', 'content': 0.011226385831832886, 'timestamp': '2025-09-10 02:48:19.718713', 'step': 4672, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:19.771107', 'step': 4672, 'epoch': 2} +{'type': 'loss', 'content': 0.0017921874532476068, 'timestamp': '2025-09-10 02:48:19.773790', 'step': 4673, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:48:19.826000', 'step': 4673, 'epoch': 2} +{'type': 'loss', 'content': 0.002091957489028573, 'timestamp': '2025-09-10 02:48:19.828295', 'step': 4674, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:19.881403', 'step': 4674, 'epoch': 2} +{'type': 'loss', 'content': 0.0037636582273989916, 'timestamp': '2025-09-10 02:48:19.883590', 'step': 4675, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:19.938231', 'step': 4675, 'epoch': 2} +{'type': 'loss', 'content': 0.002588507952168584, 'timestamp': '2025-09-10 02:48:19.948843', 'step': 4676, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:20.001702', 'step': 4676, 'epoch': 2} +{'type': 'loss', 'content': 0.01437581516802311, 'timestamp': '2025-09-10 02:48:20.003893', 'step': 4677, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:20.056603', 'step': 4677, 'epoch': 2} +{'type': 'loss', 'content': 0.011934679932892323, 'timestamp': '2025-09-10 02:48:20.058674', 'step': 4678, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:20.118779', 'step': 4678, 'epoch': 2} +{'type': 'loss', 'content': 0.010685628280043602, 'timestamp': '2025-09-10 02:48:20.129538', 'step': 4679, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:20.190072', 'step': 4679, 'epoch': 2} +{'type': 'loss', 'content': 0.0011842173989862204, 'timestamp': '2025-09-10 02:48:20.201578', 'step': 4680, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:20.254373', 'step': 4680, 'epoch': 2} +{'type': 'loss', 'content': 0.0002382299571763724, 'timestamp': '2025-09-10 02:48:20.256678', 'step': 4681, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:20.309801', 'step': 4681, 'epoch': 2} +{'type': 'loss', 'content': 0.007638778071850538, 'timestamp': '2025-09-10 02:48:20.318192', 'step': 4682, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:20.371375', 'step': 4682, 'epoch': 2} +{'type': 'loss', 'content': 0.0008221337338909507, 'timestamp': '2025-09-10 02:48:20.377835', 'step': 4683, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:48:20.440046', 'step': 4683, 'epoch': 2} +{'type': 'loss', 'content': 0.001319772214628756, 'timestamp': '2025-09-10 02:48:20.451903', 'step': 4684, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:48:20.523253', 'step': 4684, 'epoch': 2} +{'type': 'loss', 'content': 0.0011567100882530212, 'timestamp': '2025-09-10 02:48:20.537856', 'step': 4685, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:20.591333', 'step': 4685, 'epoch': 2} +{'type': 'loss', 'content': 0.0013520864304155111, 'timestamp': '2025-09-10 02:48:20.593695', 'step': 4686, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:20.646258', 'step': 4686, 'epoch': 2} +{'type': 'loss', 'content': 0.019370978698134422, 'timestamp': '2025-09-10 02:48:20.648643', 'step': 4687, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:20.706814', 'step': 4687, 'epoch': 2} +{'type': 'loss', 'content': 0.00015971229004207999, 'timestamp': '2025-09-10 02:48:20.718001', 'step': 4688, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:20.770816', 'step': 4688, 'epoch': 2} +{'type': 'loss', 'content': 0.0033498455304652452, 'timestamp': '2025-09-10 02:48:20.777381', 'step': 4689, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:20.830537', 'step': 4689, 'epoch': 2} +{'type': 'loss', 'content': 0.008158961310982704, 'timestamp': '2025-09-10 02:48:20.832545', 'step': 4690, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:20.886314', 'step': 4690, 'epoch': 2} +{'type': 'loss', 'content': 0.00515906885266304, 'timestamp': '2025-09-10 02:48:20.895930', 'step': 4691, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:20.951705', 'step': 4691, 'epoch': 2} +{'type': 'loss', 'content': 0.0014080966357141733, 'timestamp': '2025-09-10 02:48:20.957417', 'step': 4692, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:21.009766', 'step': 4692, 'epoch': 2} +{'type': 'loss', 'content': 0.003290967782959342, 'timestamp': '2025-09-10 02:48:21.011805', 'step': 4693, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:21.064731', 'step': 4693, 'epoch': 2} +{'type': 'loss', 'content': 0.00014995354285929352, 'timestamp': '2025-09-10 02:48:21.066872', 'step': 4694, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:48:21.139643', 'step': 4694, 'epoch': 2} +{'type': 'loss', 'content': 0.011130459606647491, 'timestamp': '2025-09-10 02:48:21.153137', 'step': 4695, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:21.206891', 'step': 4695, 'epoch': 2} +{'type': 'loss', 'content': 0.00019666498701553792, 'timestamp': '2025-09-10 02:48:21.212765', 'step': 4696, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:21.265304', 'step': 4696, 'epoch': 2} +{'type': 'loss', 'content': 0.002782369265332818, 'timestamp': '2025-09-10 02:48:21.267426', 'step': 4697, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:21.325709', 'step': 4697, 'epoch': 2} +{'type': 'loss', 'content': 0.003994309343397617, 'timestamp': '2025-09-10 02:48:21.336151', 'step': 4698, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:48:21.406141', 'step': 4698, 'epoch': 2} +{'type': 'loss', 'content': 0.0012797446688637137, 'timestamp': '2025-09-10 02:48:21.419021', 'step': 4699, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:21.472635', 'step': 4699, 'epoch': 2} +{'type': 'loss', 'content': 0.0034619185607880354, 'timestamp': '2025-09-10 02:48:21.478307', 'step': 4700, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:21.533445', 'step': 4700, 'epoch': 2} +{'type': 'loss', 'content': 0.0008053782512433827, 'timestamp': '2025-09-10 02:48:21.536448', 'step': 4701, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:21.588821', 'step': 4701, 'epoch': 2} +{'type': 'loss', 'content': 0.00028124029631726444, 'timestamp': '2025-09-10 02:48:21.591950', 'step': 4702, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:21.645124', 'step': 4702, 'epoch': 2} +{'type': 'loss', 'content': 0.019142018631100655, 'timestamp': '2025-09-10 02:48:21.647362', 'step': 4703, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:21.700464', 'step': 4703, 'epoch': 2} +{'type': 'loss', 'content': 0.001146473572589457, 'timestamp': '2025-09-10 02:48:21.706252', 'step': 4704, 'epoch': 2} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:48:38.513132', 'step': 4704, 'epoch': 2} +{'type': 'pplx', 'content': 26641143.515721954, 'timestamp': '2025-09-10 02:48:38.515988', 'step': 4704, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:38.570906', 'step': 4704, 'epoch': 2} +{'type': 'loss', 'content': 0.0001589340390637517, 'timestamp': '2025-09-10 02:48:38.575623', 'step': 4705, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:38.630044', 'step': 4705, 'epoch': 2} +{'type': 'loss', 'content': 0.0007871562265790999, 'timestamp': '2025-09-10 02:48:38.632106', 'step': 4706, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:38.692299', 'step': 4706, 'epoch': 2} +{'type': 'loss', 'content': 0.006433350499719381, 'timestamp': '2025-09-10 02:48:38.702963', 'step': 4707, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:38.756896', 'step': 4707, 'epoch': 2} +{'type': 'loss', 'content': 0.000366387510439381, 'timestamp': '2025-09-10 02:48:38.763217', 'step': 4708, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:48:38.857429', 'step': 4708, 'epoch': 2} +{'type': 'loss', 'content': 0.0033369511365890503, 'timestamp': '2025-09-10 02:48:38.877658', 'step': 4709, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:38.932360', 'step': 4709, 'epoch': 2} +{'type': 'loss', 'content': 0.001281169825233519, 'timestamp': '2025-09-10 02:48:38.938927', 'step': 4710, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:38.993895', 'step': 4710, 'epoch': 2} +{'type': 'loss', 'content': 0.0014966449234634638, 'timestamp': '2025-09-10 02:48:38.995985', 'step': 4711, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:39.049666', 'step': 4711, 'epoch': 2} +{'type': 'loss', 'content': 0.005338889081031084, 'timestamp': '2025-09-10 02:48:39.055782', 'step': 4712, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:48:39.134232', 'step': 4712, 'epoch': 2} +{'type': 'loss', 'content': 0.0003673503815662116, 'timestamp': '2025-09-10 02:48:39.150677', 'step': 4713, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 3360020475552.0}, 'timestamp': '2025-09-10 02:48:39.205986', 'step': 4713, 'epoch': 2} +{'type': 'loss', 'content': 0.00020906708959955722, 'timestamp': '2025-09-10 02:48:39.207936', 'step': 4714, 'epoch': 2} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:39.264899', 'step': 4714, 'epoch': 3} +{'type': 'loss', 'content': 0.00016857580340001732, 'timestamp': '2025-09-10 02:48:39.267867', 'step': 4715, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:39.320394', 'step': 4715, 'epoch': 3} +{'type': 'loss', 'content': 0.0032327771186828613, 'timestamp': '2025-09-10 02:48:39.326125', 'step': 4716, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:39.378801', 'step': 4716, 'epoch': 3} +{'type': 'loss', 'content': 0.03402525931596756, 'timestamp': '2025-09-10 02:48:39.380921', 'step': 4717, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:39.434058', 'step': 4717, 'epoch': 3} +{'type': 'loss', 'content': 0.00011595349496928975, 'timestamp': '2025-09-10 02:48:39.436176', 'step': 4718, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:39.489723', 'step': 4718, 'epoch': 3} +{'type': 'loss', 'content': 5.1871651521651074e-05, 'timestamp': '2025-09-10 02:48:39.491662', 'step': 4719, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:39.544442', 'step': 4719, 'epoch': 3} +{'type': 'loss', 'content': 0.013515396043658257, 'timestamp': '2025-09-10 02:48:39.550348', 'step': 4720, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:39.602639', 'step': 4720, 'epoch': 3} +{'type': 'loss', 'content': 0.05121804028749466, 'timestamp': '2025-09-10 02:48:39.605014', 'step': 4721, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:39.658105', 'step': 4721, 'epoch': 3} +{'type': 'loss', 'content': 0.01933814212679863, 'timestamp': '2025-09-10 02:48:39.660338', 'step': 4722, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:39.713685', 'step': 4722, 'epoch': 3} +{'type': 'loss', 'content': 0.010275743901729584, 'timestamp': '2025-09-10 02:48:39.719858', 'step': 4723, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:39.774461', 'step': 4723, 'epoch': 3} +{'type': 'loss', 'content': 0.0006337855011224747, 'timestamp': '2025-09-10 02:48:39.780467', 'step': 4724, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:39.833427', 'step': 4724, 'epoch': 3} +{'type': 'loss', 'content': 0.01716405712068081, 'timestamp': '2025-09-10 02:48:39.835423', 'step': 4725, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:39.888207', 'step': 4725, 'epoch': 3} +{'type': 'loss', 'content': 0.015138440765440464, 'timestamp': '2025-09-10 02:48:39.890371', 'step': 4726, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:39.943586', 'step': 4726, 'epoch': 3} +{'type': 'loss', 'content': 0.00028167726122774184, 'timestamp': '2025-09-10 02:48:39.945666', 'step': 4727, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:40.003293', 'step': 4727, 'epoch': 3} +{'type': 'loss', 'content': 0.00019934328156523407, 'timestamp': '2025-09-10 02:48:40.014504', 'step': 4728, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:40.067476', 'step': 4728, 'epoch': 3} +{'type': 'loss', 'content': 0.00015965728380251676, 'timestamp': '2025-09-10 02:48:40.069652', 'step': 4729, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:40.122719', 'step': 4729, 'epoch': 3} +{'type': 'loss', 'content': 9.707252320367843e-05, 'timestamp': '2025-09-10 02:48:40.124905', 'step': 4730, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:40.178909', 'step': 4730, 'epoch': 3} +{'type': 'loss', 'content': 0.008025681599974632, 'timestamp': '2025-09-10 02:48:40.188479', 'step': 4731, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:40.241130', 'step': 4731, 'epoch': 3} +{'type': 'loss', 'content': 0.0008100520935840905, 'timestamp': '2025-09-10 02:48:40.246942', 'step': 4732, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:40.299419', 'step': 4732, 'epoch': 3} +{'type': 'loss', 'content': 0.005326542071998119, 'timestamp': '2025-09-10 02:48:40.305849', 'step': 4733, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:40.358973', 'step': 4733, 'epoch': 3} +{'type': 'loss', 'content': 0.0006007250631228089, 'timestamp': '2025-09-10 02:48:40.361107', 'step': 4734, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:40.414473', 'step': 4734, 'epoch': 3} +{'type': 'loss', 'content': 0.01471441239118576, 'timestamp': '2025-09-10 02:48:40.420575', 'step': 4735, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:40.475641', 'step': 4735, 'epoch': 3} +{'type': 'loss', 'content': 0.00024760139058344066, 'timestamp': '2025-09-10 02:48:40.481652', 'step': 4736, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:40.534767', 'step': 4736, 'epoch': 3} +{'type': 'loss', 'content': 0.0013098502531647682, 'timestamp': '2025-09-10 02:48:40.537024', 'step': 4737, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:40.590018', 'step': 4737, 'epoch': 3} +{'type': 'loss', 'content': 0.0006232666200958192, 'timestamp': '2025-09-10 02:48:40.592019', 'step': 4738, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:40.645300', 'step': 4738, 'epoch': 3} +{'type': 'loss', 'content': 0.004891558084636927, 'timestamp': '2025-09-10 02:48:40.647507', 'step': 4739, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:40.700569', 'step': 4739, 'epoch': 3} +{'type': 'loss', 'content': 0.00015102754696272314, 'timestamp': '2025-09-10 02:48:40.706721', 'step': 4740, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:40.759339', 'step': 4740, 'epoch': 3} +{'type': 'loss', 'content': 0.001452394062653184, 'timestamp': '2025-09-10 02:48:40.761392', 'step': 4741, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:40.814396', 'step': 4741, 'epoch': 3} +{'type': 'loss', 'content': 0.0003562500060070306, 'timestamp': '2025-09-10 02:48:40.816627', 'step': 4742, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:40.870131', 'step': 4742, 'epoch': 3} +{'type': 'loss', 'content': 0.06023650988936424, 'timestamp': '2025-09-10 02:48:40.872446', 'step': 4743, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:40.932716', 'step': 4743, 'epoch': 3} +{'type': 'loss', 'content': 7.696308603044599e-05, 'timestamp': '2025-09-10 02:48:40.944199', 'step': 4744, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:40.997185', 'step': 4744, 'epoch': 3} +{'type': 'loss', 'content': 0.0001707561023067683, 'timestamp': '2025-09-10 02:48:40.999383', 'step': 4745, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:41.052586', 'step': 4745, 'epoch': 3} +{'type': 'loss', 'content': 0.002241963054984808, 'timestamp': '2025-09-10 02:48:41.054886', 'step': 4746, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:41.107942', 'step': 4746, 'epoch': 3} +{'type': 'loss', 'content': 0.00402917992323637, 'timestamp': '2025-09-10 02:48:41.110134', 'step': 4747, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:41.162795', 'step': 4747, 'epoch': 3} +{'type': 'loss', 'content': 0.023892248049378395, 'timestamp': '2025-09-10 02:48:41.168730', 'step': 4748, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:41.221394', 'step': 4748, 'epoch': 3} +{'type': 'loss', 'content': 0.0004089613794349134, 'timestamp': '2025-09-10 02:48:41.227304', 'step': 4749, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:41.280072', 'step': 4749, 'epoch': 3} +{'type': 'loss', 'content': 0.010775496251881123, 'timestamp': '2025-09-10 02:48:41.282379', 'step': 4750, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:41.335293', 'step': 4750, 'epoch': 3} +{'type': 'loss', 'content': 0.00013157210196368396, 'timestamp': '2025-09-10 02:48:41.337692', 'step': 4751, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:41.391002', 'step': 4751, 'epoch': 3} +{'type': 'loss', 'content': 0.0024290860164910555, 'timestamp': '2025-09-10 02:48:41.396782', 'step': 4752, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:41.449340', 'step': 4752, 'epoch': 3} +{'type': 'loss', 'content': 0.014459015801548958, 'timestamp': '2025-09-10 02:48:41.451467', 'step': 4753, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:41.504000', 'step': 4753, 'epoch': 3} +{'type': 'loss', 'content': 0.003907266538590193, 'timestamp': '2025-09-10 02:48:41.506067', 'step': 4754, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:41.559211', 'step': 4754, 'epoch': 3} +{'type': 'loss', 'content': 0.007426393683999777, 'timestamp': '2025-09-10 02:48:41.561377', 'step': 4755, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:41.614304', 'step': 4755, 'epoch': 3} +{'type': 'loss', 'content': 0.0004347166686784476, 'timestamp': '2025-09-10 02:48:41.620532', 'step': 4756, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:41.677486', 'step': 4756, 'epoch': 3} +{'type': 'loss', 'content': 0.002476779278367758, 'timestamp': '2025-09-10 02:48:41.688646', 'step': 4757, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:41.742612', 'step': 4757, 'epoch': 3} +{'type': 'loss', 'content': 0.00012231871369294822, 'timestamp': '2025-09-10 02:48:41.744689', 'step': 4758, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:41.798254', 'step': 4758, 'epoch': 3} +{'type': 'loss', 'content': 0.04061596468091011, 'timestamp': '2025-09-10 02:48:41.800371', 'step': 4759, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:41.854046', 'step': 4759, 'epoch': 3} +{'type': 'loss', 'content': 0.006013612262904644, 'timestamp': '2025-09-10 02:48:41.859798', 'step': 4760, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:41.912840', 'step': 4760, 'epoch': 3} +{'type': 'loss', 'content': 0.0035146118607372046, 'timestamp': '2025-09-10 02:48:41.919208', 'step': 4761, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:41.972288', 'step': 4761, 'epoch': 3} +{'type': 'loss', 'content': 0.0005703885108232498, 'timestamp': '2025-09-10 02:48:41.974351', 'step': 4762, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:42.026976', 'step': 4762, 'epoch': 3} +{'type': 'loss', 'content': 0.0011439550435170531, 'timestamp': '2025-09-10 02:48:42.035067', 'step': 4763, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:42.088533', 'step': 4763, 'epoch': 3} +{'type': 'loss', 'content': 0.025727814063429832, 'timestamp': '2025-09-10 02:48:42.097406', 'step': 4764, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:42.150715', 'step': 4764, 'epoch': 3} +{'type': 'loss', 'content': 0.00408810842782259, 'timestamp': '2025-09-10 02:48:42.153030', 'step': 4765, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 928], 'flops': 18560112737920.0}, 'timestamp': '2025-09-10 02:48:42.284640', 'step': 4765, 'epoch': 3} +{'type': 'loss', 'content': 0.004506801720708609, 'timestamp': '2025-09-10 02:48:42.310447', 'step': 4766, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:42.363972', 'step': 4766, 'epoch': 3} +{'type': 'loss', 'content': 0.010416925884783268, 'timestamp': '2025-09-10 02:48:42.366035', 'step': 4767, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:42.419009', 'step': 4767, 'epoch': 3} +{'type': 'loss', 'content': 0.001127948984503746, 'timestamp': '2025-09-10 02:48:42.426244', 'step': 4768, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:42.479884', 'step': 4768, 'epoch': 3} +{'type': 'loss', 'content': 0.012796514667570591, 'timestamp': '2025-09-10 02:48:42.482024', 'step': 4769, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:48:42.548201', 'step': 4769, 'epoch': 3} +{'type': 'loss', 'content': 0.00016542102093808353, 'timestamp': '2025-09-10 02:48:42.560393', 'step': 4770, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:42.613267', 'step': 4770, 'epoch': 3} +{'type': 'loss', 'content': 0.0020784097723662853, 'timestamp': '2025-09-10 02:48:42.615556', 'step': 4771, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:42.668980', 'step': 4771, 'epoch': 3} +{'type': 'loss', 'content': 0.010669670067727566, 'timestamp': '2025-09-10 02:48:42.674674', 'step': 4772, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:42.727133', 'step': 4772, 'epoch': 3} +{'type': 'loss', 'content': 0.004837530665099621, 'timestamp': '2025-09-10 02:48:42.733482', 'step': 4773, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:42.786881', 'step': 4773, 'epoch': 3} +{'type': 'loss', 'content': 0.0010128377471119165, 'timestamp': '2025-09-10 02:48:42.789885', 'step': 4774, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:42.843563', 'step': 4774, 'epoch': 3} +{'type': 'loss', 'content': 0.03209507092833519, 'timestamp': '2025-09-10 02:48:42.853196', 'step': 4775, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:42.906169', 'step': 4775, 'epoch': 3} +{'type': 'loss', 'content': 0.008237884379923344, 'timestamp': '2025-09-10 02:48:42.911991', 'step': 4776, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:42.964468', 'step': 4776, 'epoch': 3} +{'type': 'loss', 'content': 0.0006681117229163647, 'timestamp': '2025-09-10 02:48:42.967455', 'step': 4777, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:43.020293', 'step': 4777, 'epoch': 3} +{'type': 'loss', 'content': 0.0007087221019901335, 'timestamp': '2025-09-10 02:48:43.022491', 'step': 4778, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:48:43.091403', 'step': 4778, 'epoch': 3} +{'type': 'loss', 'content': 0.0005340483039617538, 'timestamp': '2025-09-10 02:48:43.103976', 'step': 4779, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:43.156963', 'step': 4779, 'epoch': 3} +{'type': 'loss', 'content': 0.003317827358841896, 'timestamp': '2025-09-10 02:48:43.162744', 'step': 4780, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:43.215071', 'step': 4780, 'epoch': 3} +{'type': 'loss', 'content': 0.00177658477332443, 'timestamp': '2025-09-10 02:48:43.217339', 'step': 4781, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:43.270264', 'step': 4781, 'epoch': 3} +{'type': 'loss', 'content': 0.03338702768087387, 'timestamp': '2025-09-10 02:48:43.272567', 'step': 4782, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:43.325768', 'step': 4782, 'epoch': 3} +{'type': 'loss', 'content': 0.010354549624025822, 'timestamp': '2025-09-10 02:48:43.328690', 'step': 4783, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:48:43.397582', 'step': 4783, 'epoch': 3} +{'type': 'loss', 'content': 0.0037773135118186474, 'timestamp': '2025-09-10 02:48:43.410929', 'step': 4784, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:43.463686', 'step': 4784, 'epoch': 3} +{'type': 'loss', 'content': 0.000469871360110119, 'timestamp': '2025-09-10 02:48:43.465777', 'step': 4785, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:43.520547', 'step': 4785, 'epoch': 3} +{'type': 'loss', 'content': 0.008968977257609367, 'timestamp': '2025-09-10 02:48:43.530295', 'step': 4786, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:43.583455', 'step': 4786, 'epoch': 3} +{'type': 'loss', 'content': 0.035639386624097824, 'timestamp': '2025-09-10 02:48:43.585607', 'step': 4787, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:43.639006', 'step': 4787, 'epoch': 3} +{'type': 'loss', 'content': 0.0011272707488387823, 'timestamp': '2025-09-10 02:48:43.644729', 'step': 4788, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:43.697313', 'step': 4788, 'epoch': 3} +{'type': 'loss', 'content': 0.010113457217812538, 'timestamp': '2025-09-10 02:48:43.705439', 'step': 4789, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:48:43.772010', 'step': 4789, 'epoch': 3} +{'type': 'loss', 'content': 0.0008517818641848862, 'timestamp': '2025-09-10 02:48:43.784231', 'step': 4790, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:43.837947', 'step': 4790, 'epoch': 3} +{'type': 'loss', 'content': 0.006375905591994524, 'timestamp': '2025-09-10 02:48:43.840061', 'step': 4791, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:48:43.892767', 'step': 4791, 'epoch': 3} +{'type': 'loss', 'content': 0.003986644558608532, 'timestamp': '2025-09-10 02:48:43.898724', 'step': 4792, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:43.955072', 'step': 4792, 'epoch': 3} +{'type': 'loss', 'content': 0.004787076264619827, 'timestamp': '2025-09-10 02:48:43.966260', 'step': 4793, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:48:44.032554', 'step': 4793, 'epoch': 3} +{'type': 'loss', 'content': 0.003805638989433646, 'timestamp': '2025-09-10 02:48:44.044754', 'step': 4794, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:44.098384', 'step': 4794, 'epoch': 3} +{'type': 'loss', 'content': 0.0030001818668097258, 'timestamp': '2025-09-10 02:48:44.100607', 'step': 4795, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:44.154296', 'step': 4795, 'epoch': 3} +{'type': 'loss', 'content': 0.013813378289341927, 'timestamp': '2025-09-10 02:48:44.164711', 'step': 4796, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:44.217660', 'step': 4796, 'epoch': 3} +{'type': 'loss', 'content': 0.0305621474981308, 'timestamp': '2025-09-10 02:48:44.224116', 'step': 4797, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:44.277105', 'step': 4797, 'epoch': 3} +{'type': 'loss', 'content': 0.004716162104159594, 'timestamp': '2025-09-10 02:48:44.279603', 'step': 4798, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:44.332918', 'step': 4798, 'epoch': 3} +{'type': 'loss', 'content': 0.004502739757299423, 'timestamp': '2025-09-10 02:48:44.335169', 'step': 4799, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:48:44.396447', 'step': 4799, 'epoch': 3} +{'type': 'loss', 'content': 0.013405581936240196, 'timestamp': '2025-09-10 02:48:44.407837', 'step': 4800, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:44.461533', 'step': 4800, 'epoch': 3} +{'type': 'loss', 'content': 0.004024644382297993, 'timestamp': '2025-09-10 02:48:44.463595', 'step': 4801, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:44.517223', 'step': 4801, 'epoch': 3} +{'type': 'loss', 'content': 0.0016606238204985857, 'timestamp': '2025-09-10 02:48:44.519464', 'step': 4802, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:48:44.594682', 'step': 4802, 'epoch': 3} +{'type': 'loss', 'content': 0.006496304180473089, 'timestamp': '2025-09-10 02:48:44.608584', 'step': 4803, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:44.662108', 'step': 4803, 'epoch': 3} +{'type': 'loss', 'content': 0.014956875704228878, 'timestamp': '2025-09-10 02:48:44.667994', 'step': 4804, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:44.720593', 'step': 4804, 'epoch': 3} +{'type': 'loss', 'content': 0.0029476068448275328, 'timestamp': '2025-09-10 02:48:44.722551', 'step': 4805, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:44.780268', 'step': 4805, 'epoch': 3} +{'type': 'loss', 'content': 0.0006885406328365207, 'timestamp': '2025-09-10 02:48:44.790778', 'step': 4806, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:44.844346', 'step': 4806, 'epoch': 3} +{'type': 'loss', 'content': 0.019481521099805832, 'timestamp': '2025-09-10 02:48:44.846498', 'step': 4807, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:44.899681', 'step': 4807, 'epoch': 3} +{'type': 'loss', 'content': 0.0005319296615198255, 'timestamp': '2025-09-10 02:48:44.905422', 'step': 4808, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:48:44.977358', 'step': 4808, 'epoch': 3} +{'type': 'loss', 'content': 0.003924211021512747, 'timestamp': '2025-09-10 02:48:44.992279', 'step': 4809, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:45.045418', 'step': 4809, 'epoch': 3} +{'type': 'loss', 'content': 0.008448751643300056, 'timestamp': '2025-09-10 02:48:45.047512', 'step': 4810, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:45.100544', 'step': 4810, 'epoch': 3} +{'type': 'loss', 'content': 0.014836247079074383, 'timestamp': '2025-09-10 02:48:45.103671', 'step': 4811, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:45.156795', 'step': 4811, 'epoch': 3} +{'type': 'loss', 'content': 0.0006818660767748952, 'timestamp': '2025-09-10 02:48:45.162477', 'step': 4812, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:45.215072', 'step': 4812, 'epoch': 3} +{'type': 'loss', 'content': 0.0029938272200524807, 'timestamp': '2025-09-10 02:48:45.216999', 'step': 4813, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:45.270379', 'step': 4813, 'epoch': 3} +{'type': 'loss', 'content': 0.004919454921036959, 'timestamp': '2025-09-10 02:48:45.272307', 'step': 4814, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:45.325671', 'step': 4814, 'epoch': 3} +{'type': 'loss', 'content': 0.012119380757212639, 'timestamp': '2025-09-10 02:48:45.327728', 'step': 4815, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:45.380672', 'step': 4815, 'epoch': 3} +{'type': 'loss', 'content': 0.0178728885948658, 'timestamp': '2025-09-10 02:48:45.386766', 'step': 4816, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:45.439324', 'step': 4816, 'epoch': 3} +{'type': 'loss', 'content': 0.025599464774131775, 'timestamp': '2025-09-10 02:48:45.441909', 'step': 4817, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:45.495879', 'step': 4817, 'epoch': 3} +{'type': 'loss', 'content': 0.0018236581236124039, 'timestamp': '2025-09-10 02:48:45.505326', 'step': 4818, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:45.560401', 'step': 4818, 'epoch': 3} +{'type': 'loss', 'content': 0.0005904998979531229, 'timestamp': '2025-09-10 02:48:45.566948', 'step': 4819, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:45.623007', 'step': 4819, 'epoch': 3} +{'type': 'loss', 'content': 0.0014317615423351526, 'timestamp': '2025-09-10 02:48:45.630938', 'step': 4820, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:45.687356', 'step': 4820, 'epoch': 3} +{'type': 'loss', 'content': 0.009754039347171783, 'timestamp': '2025-09-10 02:48:45.689500', 'step': 4821, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:45.744222', 'step': 4821, 'epoch': 3} +{'type': 'loss', 'content': 0.008487998507916927, 'timestamp': '2025-09-10 02:48:45.746330', 'step': 4822, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:45.800206', 'step': 4822, 'epoch': 3} +{'type': 'loss', 'content': 0.020899279043078423, 'timestamp': '2025-09-10 02:48:45.805770', 'step': 4823, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:45.859906', 'step': 4823, 'epoch': 3} +{'type': 'loss', 'content': 0.0005726119852624834, 'timestamp': '2025-09-10 02:48:45.866380', 'step': 4824, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:45.919037', 'step': 4824, 'epoch': 3} +{'type': 'loss', 'content': 0.001260301098227501, 'timestamp': '2025-09-10 02:48:45.929480', 'step': 4825, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:45.983276', 'step': 4825, 'epoch': 3} +{'type': 'loss', 'content': 0.0008040931425057352, 'timestamp': '2025-09-10 02:48:45.985599', 'step': 4826, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:46.039115', 'step': 4826, 'epoch': 3} +{'type': 'loss', 'content': 0.002123631536960602, 'timestamp': '2025-09-10 02:48:46.041009', 'step': 4827, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:46.093781', 'step': 4827, 'epoch': 3} +{'type': 'loss', 'content': 0.0033926458563655615, 'timestamp': '2025-09-10 02:48:46.099769', 'step': 4828, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:46.152405', 'step': 4828, 'epoch': 3} +{'type': 'loss', 'content': 0.02737729251384735, 'timestamp': '2025-09-10 02:48:46.154637', 'step': 4829, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:46.207606', 'step': 4829, 'epoch': 3} +{'type': 'loss', 'content': 0.0016665494767948985, 'timestamp': '2025-09-10 02:48:46.209917', 'step': 4830, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:46.268398', 'step': 4830, 'epoch': 3} +{'type': 'loss', 'content': 0.0068314154632389545, 'timestamp': '2025-09-10 02:48:46.278827', 'step': 4831, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:48:46.332506', 'step': 4831, 'epoch': 3} +{'type': 'loss', 'content': 0.024365218356251717, 'timestamp': '2025-09-10 02:48:46.338584', 'step': 4832, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:46.392005', 'step': 4832, 'epoch': 3} +{'type': 'loss', 'content': 0.02425851859152317, 'timestamp': '2025-09-10 02:48:46.402425', 'step': 4833, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:48:46.460371', 'step': 4833, 'epoch': 3} +{'type': 'loss', 'content': 0.0010536868358030915, 'timestamp': '2025-09-10 02:48:46.470770', 'step': 4834, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:46.524530', 'step': 4834, 'epoch': 3} +{'type': 'loss', 'content': 0.006592122372239828, 'timestamp': '2025-09-10 02:48:46.534163', 'step': 4835, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:46.587279', 'step': 4835, 'epoch': 3} +{'type': 'loss', 'content': 0.0006792788044549525, 'timestamp': '2025-09-10 02:48:46.594377', 'step': 4836, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:46.647217', 'step': 4836, 'epoch': 3} +{'type': 'loss', 'content': 0.01451940555125475, 'timestamp': '2025-09-10 02:48:46.649648', 'step': 4837, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:48:46.703048', 'step': 4837, 'epoch': 3} +{'type': 'loss', 'content': 0.004953315947204828, 'timestamp': '2025-09-10 02:48:46.710962', 'step': 4838, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:48:46.764220', 'step': 4838, 'epoch': 3} +{'type': 'loss', 'content': 0.0067435139790177345, 'timestamp': '2025-09-10 02:48:46.767234', 'step': 4839, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:48:46.820777', 'step': 4839, 'epoch': 3} +{'type': 'loss', 'content': 0.0026406829711049795, 'timestamp': '2025-09-10 02:48:46.827753', 'step': 4840, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:48:46.888591', 'step': 4840, 'epoch': 3} +{'type': 'loss', 'content': 0.005143395159393549, 'timestamp': '2025-09-10 02:48:46.900672', 'step': 4841, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:46.953799', 'step': 4841, 'epoch': 3} +{'type': 'loss', 'content': 0.0041872295551002026, 'timestamp': '2025-09-10 02:48:46.956005', 'step': 4842, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:48:47.010985', 'step': 4842, 'epoch': 3} +{'type': 'loss', 'content': 0.0008241079631261528, 'timestamp': '2025-09-10 02:48:47.020779', 'step': 4843, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:48:47.073983', 'step': 4843, 'epoch': 3} +{'type': 'loss', 'content': 0.004373212810605764, 'timestamp': '2025-09-10 02:48:47.080286', 'step': 4844, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:47.134141', 'step': 4844, 'epoch': 3} +{'type': 'loss', 'content': 0.009122295305132866, 'timestamp': '2025-09-10 02:48:47.136447', 'step': 4845, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:48:47.189606', 'step': 4845, 'epoch': 3} +{'type': 'loss', 'content': 0.0046147494576871395, 'timestamp': '2025-09-10 02:48:47.191913', 'step': 4846, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:48:47.245612', 'step': 4846, 'epoch': 3} +{'type': 'loss', 'content': 0.003591356333345175, 'timestamp': '2025-09-10 02:48:47.247665', 'step': 4847, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:48:47.300939', 'step': 4847, 'epoch': 3} +{'type': 'loss', 'content': 0.0074340240098536015, 'timestamp': '2025-09-10 02:48:47.307203', 'step': 4848, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:47.360981', 'step': 4848, 'epoch': 3} +{'type': 'loss', 'content': 0.001226637396030128, 'timestamp': '2025-09-10 02:48:47.370276', 'step': 4849, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:48:47.424521', 'step': 4849, 'epoch': 3} +{'type': 'loss', 'content': 0.0021831209305673838, 'timestamp': '2025-09-10 02:48:47.426955', 'step': 4850, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:48:47.481216', 'step': 4850, 'epoch': 3} +{'type': 'loss', 'content': 0.013943219557404518, 'timestamp': '2025-09-10 02:48:47.490835', 'step': 4851, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:49:04.366381', 'step': 4851, 'epoch': 3} +{'type': 'pplx', 'content': 24295117.98900996, 'timestamp': '2025-09-10 02:49:04.369527', 'step': 4851, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:04.425386', 'step': 4851, 'epoch': 3} +{'type': 'loss', 'content': 0.0014960489934310317, 'timestamp': '2025-09-10 02:49:04.431915', 'step': 4852, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:04.485742', 'step': 4852, 'epoch': 3} +{'type': 'loss', 'content': 0.0007174062775447965, 'timestamp': '2025-09-10 02:49:04.488064', 'step': 4853, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:04.541693', 'step': 4853, 'epoch': 3} +{'type': 'loss', 'content': 0.0016206667060032487, 'timestamp': '2025-09-10 02:49:04.544139', 'step': 4854, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:04.604845', 'step': 4854, 'epoch': 3} +{'type': 'loss', 'content': 0.01007130742073059, 'timestamp': '2025-09-10 02:49:04.615540', 'step': 4855, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:04.669571', 'step': 4855, 'epoch': 3} +{'type': 'loss', 'content': 0.004926626104861498, 'timestamp': '2025-09-10 02:49:04.675892', 'step': 4856, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:04.729198', 'step': 4856, 'epoch': 3} +{'type': 'loss', 'content': 0.002526621101424098, 'timestamp': '2025-09-10 02:49:04.731545', 'step': 4857, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:04.785083', 'step': 4857, 'epoch': 3} +{'type': 'loss', 'content': 0.00022045696096029133, 'timestamp': '2025-09-10 02:49:04.788045', 'step': 4858, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:04.842506', 'step': 4858, 'epoch': 3} +{'type': 'loss', 'content': 0.00036227802047505975, 'timestamp': '2025-09-10 02:49:04.848676', 'step': 4859, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:04.903731', 'step': 4859, 'epoch': 3} +{'type': 'loss', 'content': 0.006822288502007723, 'timestamp': '2025-09-10 02:49:04.910075', 'step': 4860, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:04.964079', 'step': 4860, 'epoch': 3} +{'type': 'loss', 'content': 0.0033081471920013428, 'timestamp': '2025-09-10 02:49:04.969912', 'step': 4861, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:05.023789', 'step': 4861, 'epoch': 3} +{'type': 'loss', 'content': 0.0016923480434343219, 'timestamp': '2025-09-10 02:49:05.026615', 'step': 4862, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:05.080912', 'step': 4862, 'epoch': 3} +{'type': 'loss', 'content': 0.0007131525198929012, 'timestamp': '2025-09-10 02:49:05.083524', 'step': 4863, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:05.139873', 'step': 4863, 'epoch': 3} +{'type': 'loss', 'content': 0.001632165745832026, 'timestamp': '2025-09-10 02:49:05.146707', 'step': 4864, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:49:05.206915', 'step': 4864, 'epoch': 3} +{'type': 'loss', 'content': 0.004180103540420532, 'timestamp': '2025-09-10 02:49:05.218636', 'step': 4865, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:05.275303', 'step': 4865, 'epoch': 3} +{'type': 'loss', 'content': 0.0026041711680591106, 'timestamp': '2025-09-10 02:49:05.279647', 'step': 4866, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:05.333279', 'step': 4866, 'epoch': 3} +{'type': 'loss', 'content': 0.0217380840331316, 'timestamp': '2025-09-10 02:49:05.342910', 'step': 4867, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:05.400225', 'step': 4867, 'epoch': 3} +{'type': 'loss', 'content': 0.0028752118814736605, 'timestamp': '2025-09-10 02:49:05.411655', 'step': 4868, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:05.465552', 'step': 4868, 'epoch': 3} +{'type': 'loss', 'content': 0.00047668040497228503, 'timestamp': '2025-09-10 02:49:05.476105', 'step': 4869, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:05.530876', 'step': 4869, 'epoch': 3} +{'type': 'loss', 'content': 0.003315207315608859, 'timestamp': '2025-09-10 02:49:05.536099', 'step': 4870, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:05.592808', 'step': 4870, 'epoch': 3} +{'type': 'loss', 'content': 0.0009684692486189306, 'timestamp': '2025-09-10 02:49:05.595043', 'step': 4871, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:05.652537', 'step': 4871, 'epoch': 3} +{'type': 'loss', 'content': 0.004432265181094408, 'timestamp': '2025-09-10 02:49:05.662877', 'step': 4872, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:05.717028', 'step': 4872, 'epoch': 3} +{'type': 'loss', 'content': 0.006341114640235901, 'timestamp': '2025-09-10 02:49:05.727531', 'step': 4873, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:05.781377', 'step': 4873, 'epoch': 3} +{'type': 'loss', 'content': 0.0015499379951506853, 'timestamp': '2025-09-10 02:49:05.783672', 'step': 4874, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:05.836538', 'step': 4874, 'epoch': 3} +{'type': 'loss', 'content': 0.006216306239366531, 'timestamp': '2025-09-10 02:49:05.838673', 'step': 4875, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:05.891506', 'step': 4875, 'epoch': 3} +{'type': 'loss', 'content': 0.0010528519051149487, 'timestamp': '2025-09-10 02:49:05.897741', 'step': 4876, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:05.951359', 'step': 4876, 'epoch': 3} +{'type': 'loss', 'content': 0.003758539678528905, 'timestamp': '2025-09-10 02:49:05.954406', 'step': 4877, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:06.008998', 'step': 4877, 'epoch': 3} +{'type': 'loss', 'content': 0.003981443587690592, 'timestamp': '2025-09-10 02:49:06.011388', 'step': 4878, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:06.066153', 'step': 4878, 'epoch': 3} +{'type': 'loss', 'content': 0.0017698196461424232, 'timestamp': '2025-09-10 02:49:06.071234', 'step': 4879, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:06.131813', 'step': 4879, 'epoch': 3} +{'type': 'loss', 'content': 0.004895442631095648, 'timestamp': '2025-09-10 02:49:06.138809', 'step': 4880, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:06.191355', 'step': 4880, 'epoch': 3} +{'type': 'loss', 'content': 0.0026843196246773005, 'timestamp': '2025-09-10 02:49:06.201363', 'step': 4881, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:06.268037', 'step': 4881, 'epoch': 3} +{'type': 'loss', 'content': 0.0005960598355159163, 'timestamp': '2025-09-10 02:49:06.278715', 'step': 4882, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:06.337112', 'step': 4882, 'epoch': 3} +{'type': 'loss', 'content': 0.0051751406863331795, 'timestamp': '2025-09-10 02:49:06.347527', 'step': 4883, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:06.401433', 'step': 4883, 'epoch': 3} +{'type': 'loss', 'content': 0.00045295237214304507, 'timestamp': '2025-09-10 02:49:06.407739', 'step': 4884, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:06.460418', 'step': 4884, 'epoch': 3} +{'type': 'loss', 'content': 0.0009586476953700185, 'timestamp': '2025-09-10 02:49:06.462574', 'step': 4885, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:06.517293', 'step': 4885, 'epoch': 3} +{'type': 'loss', 'content': 0.002113823313266039, 'timestamp': '2025-09-10 02:49:06.527090', 'step': 4886, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:06.590260', 'step': 4886, 'epoch': 3} +{'type': 'loss', 'content': 0.005936753004789352, 'timestamp': '2025-09-10 02:49:06.601021', 'step': 4887, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:06.654695', 'step': 4887, 'epoch': 3} +{'type': 'loss', 'content': 0.0013274262892082334, 'timestamp': '2025-09-10 02:49:06.660630', 'step': 4888, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:06.718881', 'step': 4888, 'epoch': 3} +{'type': 'loss', 'content': 0.005245764274150133, 'timestamp': '2025-09-10 02:49:06.720957', 'step': 4889, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:06.775784', 'step': 4889, 'epoch': 3} +{'type': 'loss', 'content': 0.001503094914369285, 'timestamp': '2025-09-10 02:49:06.785528', 'step': 4890, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:06.838239', 'step': 4890, 'epoch': 3} +{'type': 'loss', 'content': 0.000990046188235283, 'timestamp': '2025-09-10 02:49:06.840670', 'step': 4891, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:49:06.931321', 'step': 4891, 'epoch': 3} +{'type': 'loss', 'content': 0.0016281692078337073, 'timestamp': '2025-09-10 02:49:06.949262', 'step': 4892, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:07.004095', 'step': 4892, 'epoch': 3} +{'type': 'loss', 'content': 0.00026771408738568425, 'timestamp': '2025-09-10 02:49:07.014624', 'step': 4893, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.068436', 'step': 4893, 'epoch': 3} +{'type': 'loss', 'content': 0.0003569223335944116, 'timestamp': '2025-09-10 02:49:07.070747', 'step': 4894, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.127781', 'step': 4894, 'epoch': 3} +{'type': 'loss', 'content': 0.002908846829086542, 'timestamp': '2025-09-10 02:49:07.130092', 'step': 4895, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:07.190617', 'step': 4895, 'epoch': 3} +{'type': 'loss', 'content': 0.000855074729770422, 'timestamp': '2025-09-10 02:49:07.202090', 'step': 4896, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:49:07.262260', 'step': 4896, 'epoch': 3} +{'type': 'loss', 'content': 0.017130032181739807, 'timestamp': '2025-09-10 02:49:07.274307', 'step': 4897, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.327874', 'step': 4897, 'epoch': 3} +{'type': 'loss', 'content': 0.000767638732213527, 'timestamp': '2025-09-10 02:49:07.330218', 'step': 4898, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:07.383228', 'step': 4898, 'epoch': 3} +{'type': 'loss', 'content': 0.016307687386870384, 'timestamp': '2025-09-10 02:49:07.385377', 'step': 4899, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:07.438024', 'step': 4899, 'epoch': 3} +{'type': 'loss', 'content': 0.0028657265938818455, 'timestamp': '2025-09-10 02:49:07.443790', 'step': 4900, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.496198', 'step': 4900, 'epoch': 3} +{'type': 'loss', 'content': 7.104109681677073e-05, 'timestamp': '2025-09-10 02:49:07.498572', 'step': 4901, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:07.551374', 'step': 4901, 'epoch': 3} +{'type': 'loss', 'content': 0.004295418504625559, 'timestamp': '2025-09-10 02:49:07.558076', 'step': 4902, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.611613', 'step': 4902, 'epoch': 3} +{'type': 'loss', 'content': 0.006380677223205566, 'timestamp': '2025-09-10 02:49:07.613998', 'step': 4903, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:07.667679', 'step': 4903, 'epoch': 3} +{'type': 'loss', 'content': 0.00020270653476472944, 'timestamp': '2025-09-10 02:49:07.673467', 'step': 4904, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:07.725601', 'step': 4904, 'epoch': 3} +{'type': 'loss', 'content': 0.0031658937223255634, 'timestamp': '2025-09-10 02:49:07.729020', 'step': 4905, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:49:07.791154', 'step': 4905, 'epoch': 3} +{'type': 'loss', 'content': 0.007741497363895178, 'timestamp': '2025-09-10 02:49:07.802090', 'step': 4906, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:07.854893', 'step': 4906, 'epoch': 3} +{'type': 'loss', 'content': 0.006203799042850733, 'timestamp': '2025-09-10 02:49:07.857862', 'step': 4907, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.910992', 'step': 4907, 'epoch': 3} +{'type': 'loss', 'content': 0.0005209214286878705, 'timestamp': '2025-09-10 02:49:07.916932', 'step': 4908, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:07.969235', 'step': 4908, 'epoch': 3} +{'type': 'loss', 'content': 0.0008703137864358723, 'timestamp': '2025-09-10 02:49:07.971407', 'step': 4909, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:08.023941', 'step': 4909, 'epoch': 3} +{'type': 'loss', 'content': 0.03252455219626427, 'timestamp': '2025-09-10 02:49:08.027113', 'step': 4910, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:08.079919', 'step': 4910, 'epoch': 3} +{'type': 'loss', 'content': 0.0003201315994374454, 'timestamp': '2025-09-10 02:49:08.086527', 'step': 4911, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:08.139698', 'step': 4911, 'epoch': 3} +{'type': 'loss', 'content': 0.0007507981499657035, 'timestamp': '2025-09-10 02:49:08.145643', 'step': 4912, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:08.197933', 'step': 4912, 'epoch': 3} +{'type': 'loss', 'content': 0.024523703381419182, 'timestamp': '2025-09-10 02:49:08.200011', 'step': 4913, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:08.253142', 'step': 4913, 'epoch': 3} +{'type': 'loss', 'content': 6.382607534760609e-05, 'timestamp': '2025-09-10 02:49:08.261405', 'step': 4914, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:08.315062', 'step': 4914, 'epoch': 3} +{'type': 'loss', 'content': 0.0006074230768717825, 'timestamp': '2025-09-10 02:49:08.317886', 'step': 4915, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:08.371470', 'step': 4915, 'epoch': 3} +{'type': 'loss', 'content': 0.00013656097871717066, 'timestamp': '2025-09-10 02:49:08.377207', 'step': 4916, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:08.431408', 'step': 4916, 'epoch': 3} +{'type': 'loss', 'content': 9.140331530943513e-05, 'timestamp': '2025-09-10 02:49:08.433606', 'step': 4917, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:08.486494', 'step': 4917, 'epoch': 3} +{'type': 'loss', 'content': 0.008503682911396027, 'timestamp': '2025-09-10 02:49:08.489682', 'step': 4918, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:08.542174', 'step': 4918, 'epoch': 3} +{'type': 'loss', 'content': 1.4097786333877593e-05, 'timestamp': '2025-09-10 02:49:08.545182', 'step': 4919, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:08.598621', 'step': 4919, 'epoch': 3} +{'type': 'loss', 'content': 0.0008876653737388551, 'timestamp': '2025-09-10 02:49:08.604363', 'step': 4920, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:08.656771', 'step': 4920, 'epoch': 3} +{'type': 'loss', 'content': 0.001713093719445169, 'timestamp': '2025-09-10 02:49:08.658968', 'step': 4921, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:08.712900', 'step': 4921, 'epoch': 3} +{'type': 'loss', 'content': 0.00036111727240495384, 'timestamp': '2025-09-10 02:49:08.722520', 'step': 4922, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:08.776201', 'step': 4922, 'epoch': 3} +{'type': 'loss', 'content': 0.00629039341583848, 'timestamp': '2025-09-10 02:49:08.779114', 'step': 4923, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:08.832124', 'step': 4923, 'epoch': 3} +{'type': 'loss', 'content': 0.0018623418873175979, 'timestamp': '2025-09-10 02:49:08.837823', 'step': 4924, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:08.890049', 'step': 4924, 'epoch': 3} +{'type': 'loss', 'content': 0.0023605269379913807, 'timestamp': '2025-09-10 02:49:08.892909', 'step': 4925, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:08.945712', 'step': 4925, 'epoch': 3} +{'type': 'loss', 'content': 5.923226126469672e-05, 'timestamp': '2025-09-10 02:49:08.948289', 'step': 4926, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:09.001102', 'step': 4926, 'epoch': 3} +{'type': 'loss', 'content': 0.0005650994135066867, 'timestamp': '2025-09-10 02:49:09.003279', 'step': 4927, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:09.056645', 'step': 4927, 'epoch': 3} +{'type': 'loss', 'content': 0.0001354710548184812, 'timestamp': '2025-09-10 02:49:09.062499', 'step': 4928, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:09.115163', 'step': 4928, 'epoch': 3} +{'type': 'loss', 'content': 0.007866756059229374, 'timestamp': '2025-09-10 02:49:09.117259', 'step': 4929, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:09.169987', 'step': 4929, 'epoch': 3} +{'type': 'loss', 'content': 0.00015856897516641766, 'timestamp': '2025-09-10 02:49:09.172139', 'step': 4930, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:09.224756', 'step': 4930, 'epoch': 3} +{'type': 'loss', 'content': 0.0006528622470796108, 'timestamp': '2025-09-10 02:49:09.226961', 'step': 4931, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:09.284890', 'step': 4931, 'epoch': 3} +{'type': 'loss', 'content': 0.00742186838760972, 'timestamp': '2025-09-10 02:49:09.296090', 'step': 4932, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:09.348259', 'step': 4932, 'epoch': 3} +{'type': 'loss', 'content': 0.001926262048073113, 'timestamp': '2025-09-10 02:49:09.350470', 'step': 4933, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:09.405137', 'step': 4933, 'epoch': 3} +{'type': 'loss', 'content': 0.009631103835999966, 'timestamp': '2025-09-10 02:49:09.414925', 'step': 4934, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:09.475685', 'step': 4934, 'epoch': 3} +{'type': 'loss', 'content': 0.0002042563573922962, 'timestamp': '2025-09-10 02:49:09.486393', 'step': 4935, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:09.539442', 'step': 4935, 'epoch': 3} +{'type': 'loss', 'content': 0.0008414814947172999, 'timestamp': '2025-09-10 02:49:09.545439', 'step': 4936, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:09.598042', 'step': 4936, 'epoch': 3} +{'type': 'loss', 'content': 0.00015261145017575473, 'timestamp': '2025-09-10 02:49:09.600107', 'step': 4937, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:09.652779', 'step': 4937, 'epoch': 3} +{'type': 'loss', 'content': 0.004089429508894682, 'timestamp': '2025-09-10 02:49:09.654891', 'step': 4938, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:49:09.735060', 'step': 4938, 'epoch': 3} +{'type': 'loss', 'content': 0.00035414841840974987, 'timestamp': '2025-09-10 02:49:09.750009', 'step': 4939, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:09.803981', 'step': 4939, 'epoch': 3} +{'type': 'loss', 'content': 0.018595749512314796, 'timestamp': '2025-09-10 02:49:09.812776', 'step': 4940, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:09.865404', 'step': 4940, 'epoch': 3} +{'type': 'loss', 'content': 8.842367969918996e-05, 'timestamp': '2025-09-10 02:49:09.873540', 'step': 4941, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:09.927169', 'step': 4941, 'epoch': 3} +{'type': 'loss', 'content': 0.004058366175740957, 'timestamp': '2025-09-10 02:49:09.929310', 'step': 4942, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:09.987517', 'step': 4942, 'epoch': 3} +{'type': 'loss', 'content': 0.00021893209486734122, 'timestamp': '2025-09-10 02:49:09.997927', 'step': 4943, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:10.052642', 'step': 4943, 'epoch': 3} +{'type': 'loss', 'content': 0.0018335158238187432, 'timestamp': '2025-09-10 02:49:10.063188', 'step': 4944, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:49:10.128422', 'step': 4944, 'epoch': 3} +{'type': 'loss', 'content': 0.0025715469382703304, 'timestamp': '2025-09-10 02:49:10.141635', 'step': 4945, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:10.195038', 'step': 4945, 'epoch': 3} +{'type': 'loss', 'content': 0.0006101075559854507, 'timestamp': '2025-09-10 02:49:10.201469', 'step': 4946, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:10.255393', 'step': 4946, 'epoch': 3} +{'type': 'loss', 'content': 0.011807610280811787, 'timestamp': '2025-09-10 02:49:10.265031', 'step': 4947, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:10.318303', 'step': 4947, 'epoch': 3} +{'type': 'loss', 'content': 5.031693945056759e-05, 'timestamp': '2025-09-10 02:49:10.328706', 'step': 4948, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:10.380955', 'step': 4948, 'epoch': 3} +{'type': 'loss', 'content': 0.015000006183981895, 'timestamp': '2025-09-10 02:49:10.383200', 'step': 4949, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:10.436444', 'step': 4949, 'epoch': 3} +{'type': 'loss', 'content': 0.000620223639998585, 'timestamp': '2025-09-10 02:49:10.438945', 'step': 4950, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:10.492174', 'step': 4950, 'epoch': 3} +{'type': 'loss', 'content': 0.006793139036744833, 'timestamp': '2025-09-10 02:49:10.495165', 'step': 4951, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:10.547935', 'step': 4951, 'epoch': 3} +{'type': 'loss', 'content': 8.225173951359466e-05, 'timestamp': '2025-09-10 02:49:10.553726', 'step': 4952, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:10.606381', 'step': 4952, 'epoch': 3} +{'type': 'loss', 'content': 2.438864430587273e-05, 'timestamp': '2025-09-10 02:49:10.608829', 'step': 4953, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:49:10.674891', 'step': 4953, 'epoch': 3} +{'type': 'loss', 'content': 0.03761579841375351, 'timestamp': '2025-09-10 02:49:10.687081', 'step': 4954, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:10.740259', 'step': 4954, 'epoch': 3} +{'type': 'loss', 'content': 7.660187839064747e-05, 'timestamp': '2025-09-10 02:49:10.743197', 'step': 4955, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:10.796284', 'step': 4955, 'epoch': 3} +{'type': 'loss', 'content': 0.002929875859990716, 'timestamp': '2025-09-10 02:49:10.801990', 'step': 4956, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:10.854372', 'step': 4956, 'epoch': 3} +{'type': 'loss', 'content': 0.00031684644636698067, 'timestamp': '2025-09-10 02:49:10.856570', 'step': 4957, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:10.909171', 'step': 4957, 'epoch': 3} +{'type': 'loss', 'content': 0.001288395025767386, 'timestamp': '2025-09-10 02:49:10.911406', 'step': 4958, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:10.965087', 'step': 4958, 'epoch': 3} +{'type': 'loss', 'content': 0.0015277406200766563, 'timestamp': '2025-09-10 02:49:10.967183', 'step': 4959, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:11.019919', 'step': 4959, 'epoch': 3} +{'type': 'loss', 'content': 0.00038321936153806746, 'timestamp': '2025-09-10 02:49:11.025599', 'step': 4960, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:11.077688', 'step': 4960, 'epoch': 3} +{'type': 'loss', 'content': 0.00033514885581098497, 'timestamp': '2025-09-10 02:49:11.080727', 'step': 4961, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:49:11.149013', 'step': 4961, 'epoch': 3} +{'type': 'loss', 'content': 0.0003143365611322224, 'timestamp': '2025-09-10 02:49:11.161617', 'step': 4962, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:11.215773', 'step': 4962, 'epoch': 3} +{'type': 'loss', 'content': 0.0028298699762672186, 'timestamp': '2025-09-10 02:49:11.217991', 'step': 4963, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:11.276032', 'step': 4963, 'epoch': 3} +{'type': 'loss', 'content': 0.00010795405978569761, 'timestamp': '2025-09-10 02:49:11.287256', 'step': 4964, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:11.339690', 'step': 4964, 'epoch': 3} +{'type': 'loss', 'content': 0.00016476513701491058, 'timestamp': '2025-09-10 02:49:11.342009', 'step': 4965, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:11.396455', 'step': 4965, 'epoch': 3} +{'type': 'loss', 'content': 3.192130316165276e-05, 'timestamp': '2025-09-10 02:49:11.406303', 'step': 4966, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:11.459362', 'step': 4966, 'epoch': 3} +{'type': 'loss', 'content': 1.839994547481183e-05, 'timestamp': '2025-09-10 02:49:11.461591', 'step': 4967, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:11.514981', 'step': 4967, 'epoch': 3} +{'type': 'loss', 'content': 0.0019118456402793527, 'timestamp': '2025-09-10 02:49:11.520730', 'step': 4968, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:11.573564', 'step': 4968, 'epoch': 3} +{'type': 'loss', 'content': 0.003673020051792264, 'timestamp': '2025-09-10 02:49:11.575604', 'step': 4969, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:11.628497', 'step': 4969, 'epoch': 3} +{'type': 'loss', 'content': 0.0025563675444573164, 'timestamp': '2025-09-10 02:49:11.631623', 'step': 4970, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:11.684595', 'step': 4970, 'epoch': 3} +{'type': 'loss', 'content': 0.006253737956285477, 'timestamp': '2025-09-10 02:49:11.687528', 'step': 4971, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:11.741234', 'step': 4971, 'epoch': 3} +{'type': 'loss', 'content': 9.789296018425375e-05, 'timestamp': '2025-09-10 02:49:11.747119', 'step': 4972, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:11.799301', 'step': 4972, 'epoch': 3} +{'type': 'loss', 'content': 5.892937770113349e-05, 'timestamp': '2025-09-10 02:49:11.801475', 'step': 4973, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:11.854459', 'step': 4973, 'epoch': 3} +{'type': 'loss', 'content': 0.0008523422293365002, 'timestamp': '2025-09-10 02:49:11.857621', 'step': 4974, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:11.915952', 'step': 4974, 'epoch': 3} +{'type': 'loss', 'content': 0.002624673070386052, 'timestamp': '2025-09-10 02:49:11.926340', 'step': 4975, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:11.979223', 'step': 4975, 'epoch': 3} +{'type': 'loss', 'content': 0.09043518453836441, 'timestamp': '2025-09-10 02:49:11.985055', 'step': 4976, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:12.037002', 'step': 4976, 'epoch': 3} +{'type': 'loss', 'content': 0.042494259774684906, 'timestamp': '2025-09-10 02:49:12.039072', 'step': 4977, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:12.091943', 'step': 4977, 'epoch': 3} +{'type': 'loss', 'content': 0.0104117002338171, 'timestamp': '2025-09-10 02:49:12.100053', 'step': 4978, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:12.153082', 'step': 4978, 'epoch': 3} +{'type': 'loss', 'content': 0.0007600211538374424, 'timestamp': '2025-09-10 02:49:12.156033', 'step': 4979, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:12.213830', 'step': 4979, 'epoch': 3} +{'type': 'loss', 'content': 0.0011665706988424063, 'timestamp': '2025-09-10 02:49:12.225051', 'step': 4980, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:12.277620', 'step': 4980, 'epoch': 3} +{'type': 'loss', 'content': 0.0048175351694226265, 'timestamp': '2025-09-10 02:49:12.280658', 'step': 4981, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:12.332840', 'step': 4981, 'epoch': 3} +{'type': 'loss', 'content': 0.004247081466019154, 'timestamp': '2025-09-10 02:49:12.335062', 'step': 4982, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:12.387638', 'step': 4982, 'epoch': 3} +{'type': 'loss', 'content': 0.05138136073946953, 'timestamp': '2025-09-10 02:49:12.389849', 'step': 4983, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:12.443886', 'step': 4983, 'epoch': 3} +{'type': 'loss', 'content': 7.509895658586174e-05, 'timestamp': '2025-09-10 02:49:12.454422', 'step': 4984, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:12.506171', 'step': 4984, 'epoch': 3} +{'type': 'loss', 'content': 0.0012163658393546939, 'timestamp': '2025-09-10 02:49:12.508557', 'step': 4985, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:12.560959', 'step': 4985, 'epoch': 3} +{'type': 'loss', 'content': 0.0004981536767445505, 'timestamp': '2025-09-10 02:49:12.564110', 'step': 4986, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:12.616640', 'step': 4986, 'epoch': 3} +{'type': 'loss', 'content': 0.00047296658158302307, 'timestamp': '2025-09-10 02:49:12.618848', 'step': 4987, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:12.671397', 'step': 4987, 'epoch': 3} +{'type': 'loss', 'content': 0.0001558628136990592, 'timestamp': '2025-09-10 02:49:12.677081', 'step': 4988, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:12.729095', 'step': 4988, 'epoch': 3} +{'type': 'loss', 'content': 2.702091478568036e-05, 'timestamp': '2025-09-10 02:49:12.731205', 'step': 4989, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:12.784264', 'step': 4989, 'epoch': 3} +{'type': 'loss', 'content': 0.0011864184634760022, 'timestamp': '2025-09-10 02:49:12.786384', 'step': 4990, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:12.846556', 'step': 4990, 'epoch': 3} +{'type': 'loss', 'content': 0.002848926931619644, 'timestamp': '2025-09-10 02:49:12.857284', 'step': 4991, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:12.910135', 'step': 4991, 'epoch': 3} +{'type': 'loss', 'content': 4.387175795272924e-05, 'timestamp': '2025-09-10 02:49:12.916067', 'step': 4992, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:12.968200', 'step': 4992, 'epoch': 3} +{'type': 'loss', 'content': 0.00013732888328377157, 'timestamp': '2025-09-10 02:49:12.970618', 'step': 4993, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:13.023629', 'step': 4993, 'epoch': 3} +{'type': 'loss', 'content': 0.0011589375790208578, 'timestamp': '2025-09-10 02:49:13.025843', 'step': 4994, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:13.079100', 'step': 4994, 'epoch': 3} +{'type': 'loss', 'content': 7.879437180235982e-05, 'timestamp': '2025-09-10 02:49:13.088756', 'step': 4995, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:13.141190', 'step': 4995, 'epoch': 3} +{'type': 'loss', 'content': 0.0007933162851259112, 'timestamp': '2025-09-10 02:49:13.146947', 'step': 4996, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:13.199162', 'step': 4996, 'epoch': 3} +{'type': 'loss', 'content': 0.00863623432815075, 'timestamp': '2025-09-10 02:49:13.201285', 'step': 4997, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:13.253933', 'step': 4997, 'epoch': 3} +{'type': 'loss', 'content': 0.005718530621379614, 'timestamp': '2025-09-10 02:49:13.256121', 'step': 4998, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:49:30.200505', 'step': 4998, 'epoch': 3} +{'type': 'pplx', 'content': 23973688.272886705, 'timestamp': '2025-09-10 02:49:30.204895', 'step': 4998, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:30.265785', 'step': 4998, 'epoch': 3} +{'type': 'loss', 'content': 0.005250006448477507, 'timestamp': '2025-09-10 02:49:30.269041', 'step': 4999, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:30.323789', 'step': 4999, 'epoch': 3} +{'type': 'loss', 'content': 9.246975969290361e-05, 'timestamp': '2025-09-10 02:49:30.329845', 'step': 5000, 'epoch': 3} +{'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-10 02:49:30.824877', 'step': 5000, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:30.884237', 'step': 5000, 'epoch': 3} +{'type': 'loss', 'content': 0.0025869489181786776, 'timestamp': '2025-09-10 02:49:30.887783', 'step': 5001, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:30.942689', 'step': 5001, 'epoch': 3} +{'type': 'loss', 'content': 0.00045246342779137194, 'timestamp': '2025-09-10 02:49:30.944776', 'step': 5002, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:30.999017', 'step': 5002, 'epoch': 3} +{'type': 'loss', 'content': 0.00013516661419998854, 'timestamp': '2025-09-10 02:49:31.004280', 'step': 5003, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:31.058092', 'step': 5003, 'epoch': 3} +{'type': 'loss', 'content': 0.0020484435372054577, 'timestamp': '2025-09-10 02:49:31.064349', 'step': 5004, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:31.117185', 'step': 5004, 'epoch': 3} +{'type': 'loss', 'content': 7.250265480251983e-05, 'timestamp': '2025-09-10 02:49:31.127085', 'step': 5005, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:31.181185', 'step': 5005, 'epoch': 3} +{'type': 'loss', 'content': 0.00010583539551589638, 'timestamp': '2025-09-10 02:49:31.189358', 'step': 5006, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:31.243400', 'step': 5006, 'epoch': 3} +{'type': 'loss', 'content': 0.0494500957429409, 'timestamp': '2025-09-10 02:49:31.249151', 'step': 5007, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:31.302434', 'step': 5007, 'epoch': 3} +{'type': 'loss', 'content': 0.0012285331031307578, 'timestamp': '2025-09-10 02:49:31.308707', 'step': 5008, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:31.367568', 'step': 5008, 'epoch': 3} +{'type': 'loss', 'content': 0.00039893705979920924, 'timestamp': '2025-09-10 02:49:31.379183', 'step': 5009, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:49:31.441173', 'step': 5009, 'epoch': 3} +{'type': 'loss', 'content': 0.000498550885822624, 'timestamp': '2025-09-10 02:49:31.452294', 'step': 5010, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:31.505685', 'step': 5010, 'epoch': 3} +{'type': 'loss', 'content': 0.0007543464889749885, 'timestamp': '2025-09-10 02:49:31.507883', 'step': 5011, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:31.561157', 'step': 5011, 'epoch': 3} +{'type': 'loss', 'content': 0.0007778530125506222, 'timestamp': '2025-09-10 02:49:31.567254', 'step': 5012, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:49:31.631975', 'step': 5012, 'epoch': 3} +{'type': 'loss', 'content': 0.0005997510743327439, 'timestamp': '2025-09-10 02:49:31.645046', 'step': 5013, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:31.699326', 'step': 5013, 'epoch': 3} +{'type': 'loss', 'content': 0.0007705023162998259, 'timestamp': '2025-09-10 02:49:31.701563', 'step': 5014, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:31.755132', 'step': 5014, 'epoch': 3} +{'type': 'loss', 'content': 0.002069078851491213, 'timestamp': '2025-09-10 02:49:31.757665', 'step': 5015, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:31.811469', 'step': 5015, 'epoch': 3} +{'type': 'loss', 'content': 0.06012476235628128, 'timestamp': '2025-09-10 02:49:31.818436', 'step': 5016, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:31.871662', 'step': 5016, 'epoch': 3} +{'type': 'loss', 'content': 0.0005363817908801138, 'timestamp': '2025-09-10 02:49:31.878083', 'step': 5017, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:31.931714', 'step': 5017, 'epoch': 3} +{'type': 'loss', 'content': 0.0006101771141402423, 'timestamp': '2025-09-10 02:49:31.935321', 'step': 5018, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:31.991494', 'step': 5018, 'epoch': 3} +{'type': 'loss', 'content': 0.0008906811126507819, 'timestamp': '2025-09-10 02:49:31.998090', 'step': 5019, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:32.051264', 'step': 5019, 'epoch': 3} +{'type': 'loss', 'content': 0.00037788457120768726, 'timestamp': '2025-09-10 02:49:32.060042', 'step': 5020, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:32.112968', 'step': 5020, 'epoch': 3} +{'type': 'loss', 'content': 0.01288488321006298, 'timestamp': '2025-09-10 02:49:32.123456', 'step': 5021, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:32.177243', 'step': 5021, 'epoch': 3} +{'type': 'loss', 'content': 0.00032534441561438143, 'timestamp': '2025-09-10 02:49:32.180193', 'step': 5022, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:32.233414', 'step': 5022, 'epoch': 3} +{'type': 'loss', 'content': 0.0004104897961951792, 'timestamp': '2025-09-10 02:49:32.235881', 'step': 5023, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:32.289703', 'step': 5023, 'epoch': 3} +{'type': 'loss', 'content': 0.0006058880826458335, 'timestamp': '2025-09-10 02:49:32.295749', 'step': 5024, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:32.348316', 'step': 5024, 'epoch': 3} +{'type': 'loss', 'content': 0.007953226566314697, 'timestamp': '2025-09-10 02:49:32.350767', 'step': 5025, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:49:32.412534', 'step': 5025, 'epoch': 3} +{'type': 'loss', 'content': 0.0005587108316831291, 'timestamp': '2025-09-10 02:49:32.423616', 'step': 5026, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:32.483797', 'step': 5026, 'epoch': 3} +{'type': 'loss', 'content': 0.0013259410625323653, 'timestamp': '2025-09-10 02:49:32.494451', 'step': 5027, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:32.549829', 'step': 5027, 'epoch': 3} +{'type': 'loss', 'content': 0.001348756835795939, 'timestamp': '2025-09-10 02:49:32.559172', 'step': 5028, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:32.611941', 'step': 5028, 'epoch': 3} +{'type': 'loss', 'content': 0.001706228475086391, 'timestamp': '2025-09-10 02:49:32.614138', 'step': 5029, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:32.667144', 'step': 5029, 'epoch': 3} +{'type': 'loss', 'content': 0.002383357612416148, 'timestamp': '2025-09-10 02:49:32.669530', 'step': 5030, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:32.723098', 'step': 5030, 'epoch': 3} +{'type': 'loss', 'content': 0.00029474933398887515, 'timestamp': '2025-09-10 02:49:32.725403', 'step': 5031, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:32.778779', 'step': 5031, 'epoch': 3} +{'type': 'loss', 'content': 0.00028504797955974936, 'timestamp': '2025-09-10 02:49:32.787605', 'step': 5032, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:32.840467', 'step': 5032, 'epoch': 3} +{'type': 'loss', 'content': 0.0013638290110975504, 'timestamp': '2025-09-10 02:49:32.842985', 'step': 5033, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:32.895778', 'step': 5033, 'epoch': 3} +{'type': 'loss', 'content': 8.578953566029668e-05, 'timestamp': '2025-09-10 02:49:32.902202', 'step': 5034, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:49:32.968769', 'step': 5034, 'epoch': 3} +{'type': 'loss', 'content': 0.0013735241955146194, 'timestamp': '2025-09-10 02:49:32.981023', 'step': 5035, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:33.036692', 'step': 5035, 'epoch': 3} +{'type': 'loss', 'content': 0.004391853231936693, 'timestamp': '2025-09-10 02:49:33.042972', 'step': 5036, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:33.097545', 'step': 5036, 'epoch': 3} +{'type': 'loss', 'content': 0.014596188440918922, 'timestamp': '2025-09-10 02:49:33.108042', 'step': 5037, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:33.161292', 'step': 5037, 'epoch': 3} +{'type': 'loss', 'content': 0.002435219706967473, 'timestamp': '2025-09-10 02:49:33.163407', 'step': 5038, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:33.215990', 'step': 5038, 'epoch': 3} +{'type': 'loss', 'content': 0.006800977047532797, 'timestamp': '2025-09-10 02:49:33.218873', 'step': 5039, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:33.271378', 'step': 5039, 'epoch': 3} +{'type': 'loss', 'content': 0.0022466571535915136, 'timestamp': '2025-09-10 02:49:33.277349', 'step': 5040, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:33.330168', 'step': 5040, 'epoch': 3} +{'type': 'loss', 'content': 0.039408233016729355, 'timestamp': '2025-09-10 02:49:33.332275', 'step': 5041, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:33.385025', 'step': 5041, 'epoch': 3} +{'type': 'loss', 'content': 9.263594256481156e-05, 'timestamp': '2025-09-10 02:49:33.391490', 'step': 5042, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:33.444465', 'step': 5042, 'epoch': 3} +{'type': 'loss', 'content': 0.0003711421159096062, 'timestamp': '2025-09-10 02:49:33.446645', 'step': 5043, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:33.499888', 'step': 5043, 'epoch': 3} +{'type': 'loss', 'content': 0.0020662827882915735, 'timestamp': '2025-09-10 02:49:33.505891', 'step': 5044, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:33.558962', 'step': 5044, 'epoch': 3} +{'type': 'loss', 'content': 0.0005662030889652669, 'timestamp': '2025-09-10 02:49:33.561175', 'step': 5045, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:33.614915', 'step': 5045, 'epoch': 3} +{'type': 'loss', 'content': 0.0003250579466111958, 'timestamp': '2025-09-10 02:49:33.620750', 'step': 5046, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:33.675612', 'step': 5046, 'epoch': 3} +{'type': 'loss', 'content': 0.03217030689120293, 'timestamp': '2025-09-10 02:49:33.678133', 'step': 5047, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:33.731912', 'step': 5047, 'epoch': 3} +{'type': 'loss', 'content': 0.0071533904410898685, 'timestamp': '2025-09-10 02:49:33.738423', 'step': 5048, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:33.791850', 'step': 5048, 'epoch': 3} +{'type': 'loss', 'content': 0.002689636079594493, 'timestamp': '2025-09-10 02:49:33.794323', 'step': 5049, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:33.847226', 'step': 5049, 'epoch': 3} +{'type': 'loss', 'content': 0.00031585918623022735, 'timestamp': '2025-09-10 02:49:33.849385', 'step': 5050, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:33.902021', 'step': 5050, 'epoch': 3} +{'type': 'loss', 'content': 0.002843406517058611, 'timestamp': '2025-09-10 02:49:33.904224', 'step': 5051, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:33.957678', 'step': 5051, 'epoch': 3} +{'type': 'loss', 'content': 0.0015163521748036146, 'timestamp': '2025-09-10 02:49:33.963737', 'step': 5052, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:34.016455', 'step': 5052, 'epoch': 3} +{'type': 'loss', 'content': 0.0024538072757422924, 'timestamp': '2025-09-10 02:49:34.019027', 'step': 5053, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:34.072146', 'step': 5053, 'epoch': 3} +{'type': 'loss', 'content': 0.0005008867592550814, 'timestamp': '2025-09-10 02:49:34.074483', 'step': 5054, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:34.127898', 'step': 5054, 'epoch': 3} +{'type': 'loss', 'content': 0.0010241649579256773, 'timestamp': '2025-09-10 02:49:34.134387', 'step': 5055, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:49:34.187612', 'step': 5055, 'epoch': 3} +{'type': 'loss', 'content': 0.00022090300626587123, 'timestamp': '2025-09-10 02:49:34.193868', 'step': 5056, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:34.245943', 'step': 5056, 'epoch': 3} +{'type': 'loss', 'content': 0.01904134452342987, 'timestamp': '2025-09-10 02:49:34.248400', 'step': 5057, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:34.301387', 'step': 5057, 'epoch': 3} +{'type': 'loss', 'content': 0.0023352052085101604, 'timestamp': '2025-09-10 02:49:34.303775', 'step': 5058, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:34.357242', 'step': 5058, 'epoch': 3} +{'type': 'loss', 'content': 0.00011699604510795325, 'timestamp': '2025-09-10 02:49:34.366789', 'step': 5059, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:34.420427', 'step': 5059, 'epoch': 3} +{'type': 'loss', 'content': 0.013018635101616383, 'timestamp': '2025-09-10 02:49:34.427135', 'step': 5060, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:34.481243', 'step': 5060, 'epoch': 3} +{'type': 'loss', 'content': 0.0003083543269895017, 'timestamp': '2025-09-10 02:49:34.488384', 'step': 5061, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:34.543261', 'step': 5061, 'epoch': 3} +{'type': 'loss', 'content': 0.0013074075104668736, 'timestamp': '2025-09-10 02:49:34.545932', 'step': 5062, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:34.599589', 'step': 5062, 'epoch': 3} +{'type': 'loss', 'content': 0.016092877835035324, 'timestamp': '2025-09-10 02:49:34.601835', 'step': 5063, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:34.654412', 'step': 5063, 'epoch': 3} +{'type': 'loss', 'content': 0.013670714572072029, 'timestamp': '2025-09-10 02:49:34.660333', 'step': 5064, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:34.712684', 'step': 5064, 'epoch': 3} +{'type': 'loss', 'content': 0.00033214528230018914, 'timestamp': '2025-09-10 02:49:34.720892', 'step': 5065, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:34.782473', 'step': 5065, 'epoch': 3} +{'type': 'loss', 'content': 0.0003066713979933411, 'timestamp': '2025-09-10 02:49:34.784770', 'step': 5066, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:34.841144', 'step': 5066, 'epoch': 3} +{'type': 'loss', 'content': 0.0003853857342619449, 'timestamp': '2025-09-10 02:49:34.845734', 'step': 5067, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:34.899783', 'step': 5067, 'epoch': 3} +{'type': 'loss', 'content': 0.026348425075411797, 'timestamp': '2025-09-10 02:49:34.906111', 'step': 5068, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:34.968630', 'step': 5068, 'epoch': 3} +{'type': 'loss', 'content': 0.0009287443826906383, 'timestamp': '2025-09-10 02:49:34.978910', 'step': 5069, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:35.049323', 'step': 5069, 'epoch': 3} +{'type': 'loss', 'content': 0.000837551022414118, 'timestamp': '2025-09-10 02:49:35.059708', 'step': 5070, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:35.118026', 'step': 5070, 'epoch': 3} +{'type': 'loss', 'content': 0.0006919422303326428, 'timestamp': '2025-09-10 02:49:35.125440', 'step': 5071, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:49:35.225068', 'step': 5071, 'epoch': 3} +{'type': 'loss', 'content': 0.003406129078939557, 'timestamp': '2025-09-10 02:49:35.244392', 'step': 5072, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:35.309614', 'step': 5072, 'epoch': 3} +{'type': 'loss', 'content': 0.0001750012452248484, 'timestamp': '2025-09-10 02:49:35.330136', 'step': 5073, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:35.386273', 'step': 5073, 'epoch': 3} +{'type': 'loss', 'content': 0.000278674706351012, 'timestamp': '2025-09-10 02:49:35.389044', 'step': 5074, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:35.444267', 'step': 5074, 'epoch': 3} +{'type': 'loss', 'content': 0.0002639829181134701, 'timestamp': '2025-09-10 02:49:35.447791', 'step': 5075, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:35.505019', 'step': 5075, 'epoch': 3} +{'type': 'loss', 'content': 0.012780151329934597, 'timestamp': '2025-09-10 02:49:35.511265', 'step': 5076, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:35.572708', 'step': 5076, 'epoch': 3} +{'type': 'loss', 'content': 0.004734140355139971, 'timestamp': '2025-09-10 02:49:35.576691', 'step': 5077, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:35.637327', 'step': 5077, 'epoch': 3} +{'type': 'loss', 'content': 0.0007791816024109721, 'timestamp': '2025-09-10 02:49:35.647430', 'step': 5078, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:35.711728', 'step': 5078, 'epoch': 3} +{'type': 'loss', 'content': 0.00048580984002910554, 'timestamp': '2025-09-10 02:49:35.713947', 'step': 5079, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:35.771633', 'step': 5079, 'epoch': 3} +{'type': 'loss', 'content': 0.0017527417512610555, 'timestamp': '2025-09-10 02:49:35.777744', 'step': 5080, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:35.830808', 'step': 5080, 'epoch': 3} +{'type': 'loss', 'content': 0.001240090699866414, 'timestamp': '2025-09-10 02:49:35.833980', 'step': 5081, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:35.888008', 'step': 5081, 'epoch': 3} +{'type': 'loss', 'content': 0.012745247222483158, 'timestamp': '2025-09-10 02:49:35.890213', 'step': 5082, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:35.943378', 'step': 5082, 'epoch': 3} +{'type': 'loss', 'content': 0.0005714423605240881, 'timestamp': '2025-09-10 02:49:35.945703', 'step': 5083, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:35.999093', 'step': 5083, 'epoch': 3} +{'type': 'loss', 'content': 0.004364429507404566, 'timestamp': '2025-09-10 02:49:36.005279', 'step': 5084, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:36.058944', 'step': 5084, 'epoch': 3} +{'type': 'loss', 'content': 0.0006739782984368503, 'timestamp': '2025-09-10 02:49:36.061000', 'step': 5085, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:36.114132', 'step': 5085, 'epoch': 3} +{'type': 'loss', 'content': 0.015000805258750916, 'timestamp': '2025-09-10 02:49:36.122289', 'step': 5086, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:36.175309', 'step': 5086, 'epoch': 3} +{'type': 'loss', 'content': 0.0003139723849017173, 'timestamp': '2025-09-10 02:49:36.177362', 'step': 5087, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:36.231217', 'step': 5087, 'epoch': 3} +{'type': 'loss', 'content': 0.011213402263820171, 'timestamp': '2025-09-10 02:49:36.237485', 'step': 5088, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.290225', 'step': 5088, 'epoch': 3} +{'type': 'loss', 'content': 0.0011400616494938731, 'timestamp': '2025-09-10 02:49:36.292377', 'step': 5089, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:36.348266', 'step': 5089, 'epoch': 3} +{'type': 'loss', 'content': 0.010984824039041996, 'timestamp': '2025-09-10 02:49:36.358022', 'step': 5090, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:49:36.436135', 'step': 5090, 'epoch': 3} +{'type': 'loss', 'content': 0.0021188571117818356, 'timestamp': '2025-09-10 02:49:36.450188', 'step': 5091, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:36.504344', 'step': 5091, 'epoch': 3} +{'type': 'loss', 'content': 0.00044342773617245257, 'timestamp': '2025-09-10 02:49:36.510547', 'step': 5092, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.563058', 'step': 5092, 'epoch': 3} +{'type': 'loss', 'content': 0.00027311412850394845, 'timestamp': '2025-09-10 02:49:36.565069', 'step': 5093, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:36.617885', 'step': 5093, 'epoch': 3} +{'type': 'loss', 'content': 0.0017884830012917519, 'timestamp': '2025-09-10 02:49:36.620141', 'step': 5094, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.672704', 'step': 5094, 'epoch': 3} +{'type': 'loss', 'content': 0.0014469983289018273, 'timestamp': '2025-09-10 02:49:36.674937', 'step': 5095, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.727835', 'step': 5095, 'epoch': 3} +{'type': 'loss', 'content': 0.0005529725458472967, 'timestamp': '2025-09-10 02:49:36.734814', 'step': 5096, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.791895', 'step': 5096, 'epoch': 3} +{'type': 'loss', 'content': 0.0006478400900959969, 'timestamp': '2025-09-10 02:49:36.793897', 'step': 5097, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:36.847210', 'step': 5097, 'epoch': 3} +{'type': 'loss', 'content': 0.00015148741658776999, 'timestamp': '2025-09-10 02:49:36.853694', 'step': 5098, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:36.912311', 'step': 5098, 'epoch': 3} +{'type': 'loss', 'content': 0.008421264588832855, 'timestamp': '2025-09-10 02:49:36.922777', 'step': 5099, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:36.979476', 'step': 5099, 'epoch': 3} +{'type': 'loss', 'content': 0.0008443708647973835, 'timestamp': '2025-09-10 02:49:36.985399', 'step': 5100, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:37.040582', 'step': 5100, 'epoch': 3} +{'type': 'loss', 'content': 0.0007894421578384936, 'timestamp': '2025-09-10 02:49:37.043043', 'step': 5101, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:49:37.103138', 'step': 5101, 'epoch': 3} +{'type': 'loss', 'content': 0.00032058203942142427, 'timestamp': '2025-09-10 02:49:37.113851', 'step': 5102, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:37.167893', 'step': 5102, 'epoch': 3} +{'type': 'loss', 'content': 0.0037115856539458036, 'timestamp': '2025-09-10 02:49:37.171343', 'step': 5103, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:37.227299', 'step': 5103, 'epoch': 3} +{'type': 'loss', 'content': 0.0011774562299251556, 'timestamp': '2025-09-10 02:49:37.233368', 'step': 5104, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:37.286911', 'step': 5104, 'epoch': 3} +{'type': 'loss', 'content': 0.0008899167296476662, 'timestamp': '2025-09-10 02:49:37.294993', 'step': 5105, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:37.350669', 'step': 5105, 'epoch': 3} +{'type': 'loss', 'content': 0.008432075381278992, 'timestamp': '2025-09-10 02:49:37.353535', 'step': 5106, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:37.406386', 'step': 5106, 'epoch': 3} +{'type': 'loss', 'content': 0.05210445076227188, 'timestamp': '2025-09-10 02:49:37.412589', 'step': 5107, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:37.466468', 'step': 5107, 'epoch': 3} +{'type': 'loss', 'content': 0.000361327693099156, 'timestamp': '2025-09-10 02:49:37.478643', 'step': 5108, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:37.532028', 'step': 5108, 'epoch': 3} +{'type': 'loss', 'content': 0.00025633556651882827, 'timestamp': '2025-09-10 02:49:37.534060', 'step': 5109, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:37.588570', 'step': 5109, 'epoch': 3} +{'type': 'loss', 'content': 0.0002622759493533522, 'timestamp': '2025-09-10 02:49:37.598333', 'step': 5110, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:37.651638', 'step': 5110, 'epoch': 3} +{'type': 'loss', 'content': 0.00011523003922775388, 'timestamp': '2025-09-10 02:49:37.653826', 'step': 5111, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:37.709778', 'step': 5111, 'epoch': 3} +{'type': 'loss', 'content': 0.00017187556659337133, 'timestamp': '2025-09-10 02:49:37.715754', 'step': 5112, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:37.770046', 'step': 5112, 'epoch': 3} +{'type': 'loss', 'content': 0.0024591197725385427, 'timestamp': '2025-09-10 02:49:37.772102', 'step': 5113, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:37.824796', 'step': 5113, 'epoch': 3} +{'type': 'loss', 'content': 0.00047810334945097566, 'timestamp': '2025-09-10 02:49:37.827250', 'step': 5114, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:37.881585', 'step': 5114, 'epoch': 3} +{'type': 'loss', 'content': 0.005543294828385115, 'timestamp': '2025-09-10 02:49:37.883870', 'step': 5115, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:37.937084', 'step': 5115, 'epoch': 3} +{'type': 'loss', 'content': 0.003660088637843728, 'timestamp': '2025-09-10 02:49:37.944317', 'step': 5116, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:37.997548', 'step': 5116, 'epoch': 3} +{'type': 'loss', 'content': 0.0012102748733013868, 'timestamp': '2025-09-10 02:49:38.008053', 'step': 5117, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:38.061182', 'step': 5117, 'epoch': 3} +{'type': 'loss', 'content': 0.00013141623639967293, 'timestamp': '2025-09-10 02:49:38.063455', 'step': 5118, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:38.116627', 'step': 5118, 'epoch': 3} +{'type': 'loss', 'content': 0.0004812480474356562, 'timestamp': '2025-09-10 02:49:38.119063', 'step': 5119, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:38.171901', 'step': 5119, 'epoch': 3} +{'type': 'loss', 'content': 0.001012549502775073, 'timestamp': '2025-09-10 02:49:38.177969', 'step': 5120, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:38.230770', 'step': 5120, 'epoch': 3} +{'type': 'loss', 'content': 0.002350731287151575, 'timestamp': '2025-09-10 02:49:38.232896', 'step': 5121, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:38.286052', 'step': 5121, 'epoch': 3} +{'type': 'loss', 'content': 0.0006704625557176769, 'timestamp': '2025-09-10 02:49:38.295599', 'step': 5122, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:38.350779', 'step': 5122, 'epoch': 3} +{'type': 'loss', 'content': 0.0005529317422769964, 'timestamp': '2025-09-10 02:49:38.360523', 'step': 5123, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:38.414538', 'step': 5123, 'epoch': 3} +{'type': 'loss', 'content': 0.003123534144833684, 'timestamp': '2025-09-10 02:49:38.420821', 'step': 5124, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:38.473361', 'step': 5124, 'epoch': 3} +{'type': 'loss', 'content': 0.021540414541959763, 'timestamp': '2025-09-10 02:49:38.475574', 'step': 5125, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:38.528528', 'step': 5125, 'epoch': 3} +{'type': 'loss', 'content': 0.003263267921283841, 'timestamp': '2025-09-10 02:49:38.530986', 'step': 5126, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:38.584117', 'step': 5126, 'epoch': 3} +{'type': 'loss', 'content': 0.001052754931151867, 'timestamp': '2025-09-10 02:49:38.586157', 'step': 5127, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:38.639186', 'step': 5127, 'epoch': 3} +{'type': 'loss', 'content': 0.0006556085427291691, 'timestamp': '2025-09-10 02:49:38.645681', 'step': 5128, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:38.699377', 'step': 5128, 'epoch': 3} +{'type': 'loss', 'content': 0.0044709183275699615, 'timestamp': '2025-09-10 02:49:38.701597', 'step': 5129, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:38.755212', 'step': 5129, 'epoch': 3} +{'type': 'loss', 'content': 0.0005360793438740075, 'timestamp': '2025-09-10 02:49:38.761761', 'step': 5130, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:38.815166', 'step': 5130, 'epoch': 3} +{'type': 'loss', 'content': 0.0006453011883422732, 'timestamp': '2025-09-10 02:49:38.821525', 'step': 5131, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:38.875459', 'step': 5131, 'epoch': 3} +{'type': 'loss', 'content': 0.00035264246980659664, 'timestamp': '2025-09-10 02:49:38.881786', 'step': 5132, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:38.934396', 'step': 5132, 'epoch': 3} +{'type': 'loss', 'content': 0.012222418561577797, 'timestamp': '2025-09-10 02:49:38.940847', 'step': 5133, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:38.994385', 'step': 5133, 'epoch': 3} +{'type': 'loss', 'content': 0.026795705780386925, 'timestamp': '2025-09-10 02:49:39.000928', 'step': 5134, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:39.053772', 'step': 5134, 'epoch': 3} +{'type': 'loss', 'content': 0.008503907360136509, 'timestamp': '2025-09-10 02:49:39.061450', 'step': 5135, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:39.114124', 'step': 5135, 'epoch': 3} +{'type': 'loss', 'content': 0.0025715657975524664, 'timestamp': '2025-09-10 02:49:39.122672', 'step': 5136, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:39.175209', 'step': 5136, 'epoch': 3} +{'type': 'loss', 'content': 0.0005679276655428112, 'timestamp': '2025-09-10 02:49:39.181455', 'step': 5137, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:39.234465', 'step': 5137, 'epoch': 3} +{'type': 'loss', 'content': 0.01844605803489685, 'timestamp': '2025-09-10 02:49:39.237375', 'step': 5138, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:39.290362', 'step': 5138, 'epoch': 3} +{'type': 'loss', 'content': 0.000656695629004389, 'timestamp': '2025-09-10 02:49:39.292589', 'step': 5139, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:39.345960', 'step': 5139, 'epoch': 3} +{'type': 'loss', 'content': 0.0009054274414665997, 'timestamp': '2025-09-10 02:49:39.351903', 'step': 5140, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:39.404838', 'step': 5140, 'epoch': 3} +{'type': 'loss', 'content': 0.0066103278659284115, 'timestamp': '2025-09-10 02:49:39.406875', 'step': 5141, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:39.459287', 'step': 5141, 'epoch': 3} +{'type': 'loss', 'content': 0.0015177327441051602, 'timestamp': '2025-09-10 02:49:39.465887', 'step': 5142, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:39.520352', 'step': 5142, 'epoch': 3} +{'type': 'loss', 'content': 0.00022303589503280818, 'timestamp': '2025-09-10 02:49:39.528285', 'step': 5143, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:39.581502', 'step': 5143, 'epoch': 3} +{'type': 'loss', 'content': 0.00025539478519931436, 'timestamp': '2025-09-10 02:49:39.588646', 'step': 5144, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:39.641524', 'step': 5144, 'epoch': 3} +{'type': 'loss', 'content': 0.0026020356453955173, 'timestamp': '2025-09-10 02:49:39.651413', 'step': 5145, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:49:56.557882', 'step': 5145, 'epoch': 3} +{'type': 'pplx', 'content': 20796887.005170308, 'timestamp': '2025-09-10 02:49:56.560640', 'step': 5145, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:49:56.614034', 'step': 5145, 'epoch': 3} +{'type': 'loss', 'content': 0.0010840559843927622, 'timestamp': '2025-09-10 02:49:56.616063', 'step': 5146, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:56.669698', 'step': 5146, 'epoch': 3} +{'type': 'loss', 'content': 0.04474133253097534, 'timestamp': '2025-09-10 02:49:56.671897', 'step': 5147, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:56.730900', 'step': 5147, 'epoch': 3} +{'type': 'loss', 'content': 0.0041691334918141365, 'timestamp': '2025-09-10 02:49:56.737974', 'step': 5148, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:56.793665', 'step': 5148, 'epoch': 3} +{'type': 'loss', 'content': 0.00019682837591972202, 'timestamp': '2025-09-10 02:49:56.795627', 'step': 5149, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:56.848385', 'step': 5149, 'epoch': 3} +{'type': 'loss', 'content': 0.014704102650284767, 'timestamp': '2025-09-10 02:49:56.850619', 'step': 5150, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:56.907156', 'step': 5150, 'epoch': 3} +{'type': 'loss', 'content': 8.698314923094586e-05, 'timestamp': '2025-09-10 02:49:56.916709', 'step': 5151, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:56.977645', 'step': 5151, 'epoch': 3} +{'type': 'loss', 'content': 0.029849465936422348, 'timestamp': '2025-09-10 02:49:56.988885', 'step': 5152, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:57.042645', 'step': 5152, 'epoch': 3} +{'type': 'loss', 'content': 0.0014911588514223695, 'timestamp': '2025-09-10 02:49:57.044668', 'step': 5153, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:57.097245', 'step': 5153, 'epoch': 3} +{'type': 'loss', 'content': 0.0010748127242550254, 'timestamp': '2025-09-10 02:49:57.103951', 'step': 5154, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:57.157188', 'step': 5154, 'epoch': 3} +{'type': 'loss', 'content': 0.00036259947228245437, 'timestamp': '2025-09-10 02:49:57.159313', 'step': 5155, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.212485', 'step': 5155, 'epoch': 3} +{'type': 'loss', 'content': 0.001834212918765843, 'timestamp': '2025-09-10 02:49:57.218823', 'step': 5156, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.272654', 'step': 5156, 'epoch': 3} +{'type': 'loss', 'content': 0.0004516570770647377, 'timestamp': '2025-09-10 02:49:57.274763', 'step': 5157, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.327689', 'step': 5157, 'epoch': 3} +{'type': 'loss', 'content': 0.00803940836340189, 'timestamp': '2025-09-10 02:49:57.330069', 'step': 5158, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:49:57.391438', 'step': 5158, 'epoch': 3} +{'type': 'loss', 'content': 0.007134335581213236, 'timestamp': '2025-09-10 02:49:57.402545', 'step': 5159, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.455351', 'step': 5159, 'epoch': 3} +{'type': 'loss', 'content': 0.022051816806197166, 'timestamp': '2025-09-10 02:49:57.461261', 'step': 5160, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:57.513678', 'step': 5160, 'epoch': 3} +{'type': 'loss', 'content': 0.0005428260774351656, 'timestamp': '2025-09-10 02:49:57.516241', 'step': 5161, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:57.581565', 'step': 5161, 'epoch': 3} +{'type': 'loss', 'content': 9.055736882146448e-05, 'timestamp': '2025-09-10 02:49:57.591376', 'step': 5162, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:57.644310', 'step': 5162, 'epoch': 3} +{'type': 'loss', 'content': 0.0029876951593905687, 'timestamp': '2025-09-10 02:49:57.652319', 'step': 5163, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:57.705694', 'step': 5163, 'epoch': 3} +{'type': 'loss', 'content': 0.01764310896396637, 'timestamp': '2025-09-10 02:49:57.715231', 'step': 5164, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:57.772716', 'step': 5164, 'epoch': 3} +{'type': 'loss', 'content': 0.04342194274067879, 'timestamp': '2025-09-10 02:49:57.774656', 'step': 5165, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.826886', 'step': 5165, 'epoch': 3} +{'type': 'loss', 'content': 0.001361071947030723, 'timestamp': '2025-09-10 02:49:57.829071', 'step': 5166, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:57.882040', 'step': 5166, 'epoch': 3} +{'type': 'loss', 'content': 0.0007704917225055397, 'timestamp': '2025-09-10 02:49:57.884018', 'step': 5167, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:49:57.938939', 'step': 5167, 'epoch': 3} +{'type': 'loss', 'content': 0.0030907695181667805, 'timestamp': '2025-09-10 02:49:57.949519', 'step': 5168, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:58.010035', 'step': 5168, 'epoch': 3} +{'type': 'loss', 'content': 0.008091972209513187, 'timestamp': '2025-09-10 02:49:58.016219', 'step': 5169, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:58.072195', 'step': 5169, 'epoch': 3} +{'type': 'loss', 'content': 0.0026753931306302547, 'timestamp': '2025-09-10 02:49:58.074017', 'step': 5170, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:58.130740', 'step': 5170, 'epoch': 3} +{'type': 'loss', 'content': 0.003592865541577339, 'timestamp': '2025-09-10 02:49:58.133734', 'step': 5171, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:58.186135', 'step': 5171, 'epoch': 3} +{'type': 'loss', 'content': 0.00043954423745162785, 'timestamp': '2025-09-10 02:49:58.191987', 'step': 5172, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:58.246496', 'step': 5172, 'epoch': 3} +{'type': 'loss', 'content': 0.00014983757864683867, 'timestamp': '2025-09-10 02:49:58.249424', 'step': 5173, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:58.302483', 'step': 5173, 'epoch': 3} +{'type': 'loss', 'content': 0.003581056371331215, 'timestamp': '2025-09-10 02:49:58.304496', 'step': 5174, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:58.356780', 'step': 5174, 'epoch': 3} +{'type': 'loss', 'content': 0.0002480275579728186, 'timestamp': '2025-09-10 02:49:58.364613', 'step': 5175, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:58.421891', 'step': 5175, 'epoch': 3} +{'type': 'loss', 'content': 0.0011433314066380262, 'timestamp': '2025-09-10 02:49:58.428837', 'step': 5176, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:49:58.485399', 'step': 5176, 'epoch': 3} +{'type': 'loss', 'content': 0.000771212566178292, 'timestamp': '2025-09-10 02:49:58.496647', 'step': 5177, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:49:58.552360', 'step': 5177, 'epoch': 3} +{'type': 'loss', 'content': 0.0605146549642086, 'timestamp': '2025-09-10 02:49:58.561916', 'step': 5178, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:58.621368', 'step': 5178, 'epoch': 3} +{'type': 'loss', 'content': 0.00782932247966528, 'timestamp': '2025-09-10 02:49:58.623300', 'step': 5179, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:58.676099', 'step': 5179, 'epoch': 3} +{'type': 'loss', 'content': 0.001462681801058352, 'timestamp': '2025-09-10 02:49:58.682330', 'step': 5180, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:58.737098', 'step': 5180, 'epoch': 3} +{'type': 'loss', 'content': 0.009882143698632717, 'timestamp': '2025-09-10 02:49:58.739188', 'step': 5181, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:49:58.792230', 'step': 5181, 'epoch': 3} +{'type': 'loss', 'content': 0.0024378912057727575, 'timestamp': '2025-09-10 02:49:58.800284', 'step': 5182, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:58.853367', 'step': 5182, 'epoch': 3} +{'type': 'loss', 'content': 0.0007555658230558038, 'timestamp': '2025-09-10 02:49:58.859737', 'step': 5183, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:58.913253', 'step': 5183, 'epoch': 3} +{'type': 'loss', 'content': 0.0016349944053217769, 'timestamp': '2025-09-10 02:49:58.919201', 'step': 5184, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:58.971661', 'step': 5184, 'epoch': 3} +{'type': 'loss', 'content': 0.0012841911520808935, 'timestamp': '2025-09-10 02:49:58.978305', 'step': 5185, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:49:59.030423', 'step': 5185, 'epoch': 3} +{'type': 'loss', 'content': 0.0008138803532347083, 'timestamp': '2025-09-10 02:49:59.032522', 'step': 5186, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:59.084873', 'step': 5186, 'epoch': 3} +{'type': 'loss', 'content': 0.005838603712618351, 'timestamp': '2025-09-10 02:49:59.087178', 'step': 5187, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:59.140050', 'step': 5187, 'epoch': 3} +{'type': 'loss', 'content': 0.0002849035372491926, 'timestamp': '2025-09-10 02:49:59.145860', 'step': 5188, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:49:59.198381', 'step': 5188, 'epoch': 3} +{'type': 'loss', 'content': 0.0014062098925933242, 'timestamp': '2025-09-10 02:49:59.201128', 'step': 5189, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:59.254421', 'step': 5189, 'epoch': 3} +{'type': 'loss', 'content': 0.01566436141729355, 'timestamp': '2025-09-10 02:49:59.256429', 'step': 5190, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:59.308812', 'step': 5190, 'epoch': 3} +{'type': 'loss', 'content': 0.0013595082564279437, 'timestamp': '2025-09-10 02:49:59.310960', 'step': 5191, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:59.363460', 'step': 5191, 'epoch': 3} +{'type': 'loss', 'content': 0.016373340040445328, 'timestamp': '2025-09-10 02:49:59.369093', 'step': 5192, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:59.421187', 'step': 5192, 'epoch': 3} +{'type': 'loss', 'content': 0.011580752208828926, 'timestamp': '2025-09-10 02:49:59.427636', 'step': 5193, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:59.480557', 'step': 5193, 'epoch': 3} +{'type': 'loss', 'content': 0.0003131380653940141, 'timestamp': '2025-09-10 02:49:59.482837', 'step': 5194, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:49:59.535218', 'step': 5194, 'epoch': 3} +{'type': 'loss', 'content': 0.0005036334623582661, 'timestamp': '2025-09-10 02:49:59.537122', 'step': 5195, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:49:59.589263', 'step': 5195, 'epoch': 3} +{'type': 'loss', 'content': 0.0003483194450382143, 'timestamp': '2025-09-10 02:49:59.595044', 'step': 5196, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:49:59.647143', 'step': 5196, 'epoch': 3} +{'type': 'loss', 'content': 0.0005673426203429699, 'timestamp': '2025-09-10 02:49:59.649167', 'step': 5197, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:49:59.701577', 'step': 5197, 'epoch': 3} +{'type': 'loss', 'content': 0.0017793395090848207, 'timestamp': '2025-09-10 02:49:59.708101', 'step': 5198, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:49:59.760737', 'step': 5198, 'epoch': 3} +{'type': 'loss', 'content': 0.01295098103582859, 'timestamp': '2025-09-10 02:49:59.762776', 'step': 5199, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:49:59.829216', 'step': 5199, 'epoch': 3} +{'type': 'loss', 'content': 0.0029301675967872143, 'timestamp': '2025-09-10 02:49:59.842246', 'step': 5200, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:59.897251', 'step': 5200, 'epoch': 3} +{'type': 'loss', 'content': 0.0006738324882462621, 'timestamp': '2025-09-10 02:49:59.899397', 'step': 5201, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:49:59.952762', 'step': 5201, 'epoch': 3} +{'type': 'loss', 'content': 0.02316547930240631, 'timestamp': '2025-09-10 02:49:59.955069', 'step': 5202, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:00.008209', 'step': 5202, 'epoch': 3} +{'type': 'loss', 'content': 0.002208786318078637, 'timestamp': '2025-09-10 02:50:00.011364', 'step': 5203, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:50:00.079789', 'step': 5203, 'epoch': 3} +{'type': 'loss', 'content': 0.032139282673597336, 'timestamp': '2025-09-10 02:50:00.093240', 'step': 5204, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:00.145309', 'step': 5204, 'epoch': 3} +{'type': 'loss', 'content': 0.0005456877988763154, 'timestamp': '2025-09-10 02:50:00.147393', 'step': 5205, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:00.207882', 'step': 5205, 'epoch': 3} +{'type': 'loss', 'content': 0.0021430044434964657, 'timestamp': '2025-09-10 02:50:00.218550', 'step': 5206, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:00.270891', 'step': 5206, 'epoch': 3} +{'type': 'loss', 'content': 0.000990464468486607, 'timestamp': '2025-09-10 02:50:00.274024', 'step': 5207, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:00.326898', 'step': 5207, 'epoch': 3} +{'type': 'loss', 'content': 0.0004958632052876055, 'timestamp': '2025-09-10 02:50:00.332614', 'step': 5208, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:00.384371', 'step': 5208, 'epoch': 3} +{'type': 'loss', 'content': 0.0046188137494027615, 'timestamp': '2025-09-10 02:50:00.386672', 'step': 5209, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:00.439418', 'step': 5209, 'epoch': 3} +{'type': 'loss', 'content': 0.005221318919211626, 'timestamp': '2025-09-10 02:50:00.441673', 'step': 5210, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:00.493962', 'step': 5210, 'epoch': 3} +{'type': 'loss', 'content': 0.004844017326831818, 'timestamp': '2025-09-10 02:50:00.496021', 'step': 5211, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:00.564060', 'step': 5211, 'epoch': 3} +{'type': 'loss', 'content': 0.002229286590591073, 'timestamp': '2025-09-10 02:50:00.577423', 'step': 5212, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:00.629166', 'step': 5212, 'epoch': 3} +{'type': 'loss', 'content': 0.0018672727746888995, 'timestamp': '2025-09-10 02:50:00.631353', 'step': 5213, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:00.684131', 'step': 5213, 'epoch': 3} +{'type': 'loss', 'content': 0.01097164861857891, 'timestamp': '2025-09-10 02:50:00.686296', 'step': 5214, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:00.740038', 'step': 5214, 'epoch': 3} +{'type': 'loss', 'content': 0.0007613471825607121, 'timestamp': '2025-09-10 02:50:00.742221', 'step': 5215, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:00.795352', 'step': 5215, 'epoch': 3} +{'type': 'loss', 'content': 0.0011177508858963847, 'timestamp': '2025-09-10 02:50:00.802479', 'step': 5216, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:00.855524', 'step': 5216, 'epoch': 3} +{'type': 'loss', 'content': 0.010218006558716297, 'timestamp': '2025-09-10 02:50:00.857834', 'step': 5217, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:00.911164', 'step': 5217, 'epoch': 3} +{'type': 'loss', 'content': 0.015347718261182308, 'timestamp': '2025-09-10 02:50:00.917741', 'step': 5218, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:00.978650', 'step': 5218, 'epoch': 3} +{'type': 'loss', 'content': 0.009092840366065502, 'timestamp': '2025-09-10 02:50:00.989532', 'step': 5219, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:01.042548', 'step': 5219, 'epoch': 3} +{'type': 'loss', 'content': 0.040490929037332535, 'timestamp': '2025-09-10 02:50:01.048814', 'step': 5220, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:01.100726', 'step': 5220, 'epoch': 3} +{'type': 'loss', 'content': 0.003533871378749609, 'timestamp': '2025-09-10 02:50:01.103664', 'step': 5221, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:01.158749', 'step': 5221, 'epoch': 3} +{'type': 'loss', 'content': 0.0028786794282495975, 'timestamp': '2025-09-10 02:50:01.161755', 'step': 5222, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:01.215246', 'step': 5222, 'epoch': 3} +{'type': 'loss', 'content': 0.002021850785240531, 'timestamp': '2025-09-10 02:50:01.224853', 'step': 5223, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:01.278956', 'step': 5223, 'epoch': 3} +{'type': 'loss', 'content': 0.019813692197203636, 'timestamp': '2025-09-10 02:50:01.289518', 'step': 5224, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:01.348659', 'step': 5224, 'epoch': 3} +{'type': 'loss', 'content': 0.005793183110654354, 'timestamp': '2025-09-10 02:50:01.360437', 'step': 5225, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:01.414065', 'step': 5225, 'epoch': 3} +{'type': 'loss', 'content': 0.012712853029370308, 'timestamp': '2025-09-10 02:50:01.420353', 'step': 5226, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:01.473476', 'step': 5226, 'epoch': 3} +{'type': 'loss', 'content': 0.0008272496052086353, 'timestamp': '2025-09-10 02:50:01.476388', 'step': 5227, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:01.544128', 'step': 5227, 'epoch': 3} +{'type': 'loss', 'content': 0.0072872755117714405, 'timestamp': '2025-09-10 02:50:01.557479', 'step': 5228, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:01.609647', 'step': 5228, 'epoch': 3} +{'type': 'loss', 'content': 0.005303137004375458, 'timestamp': '2025-09-10 02:50:01.611446', 'step': 5229, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:01.672045', 'step': 5229, 'epoch': 3} +{'type': 'loss', 'content': 0.0010862888302654028, 'timestamp': '2025-09-10 02:50:01.682804', 'step': 5230, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:01.741629', 'step': 5230, 'epoch': 3} +{'type': 'loss', 'content': 0.03051486611366272, 'timestamp': '2025-09-10 02:50:01.752015', 'step': 5231, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:01.805061', 'step': 5231, 'epoch': 3} +{'type': 'loss', 'content': 0.011646350845694542, 'timestamp': '2025-09-10 02:50:01.810819', 'step': 5232, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:01.863905', 'step': 5232, 'epoch': 3} +{'type': 'loss', 'content': 0.0015296988422051072, 'timestamp': '2025-09-10 02:50:01.872272', 'step': 5233, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:01.925014', 'step': 5233, 'epoch': 3} +{'type': 'loss', 'content': 0.009831218980252743, 'timestamp': '2025-09-10 02:50:01.926787', 'step': 5234, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:01.979186', 'step': 5234, 'epoch': 3} +{'type': 'loss', 'content': 0.003388639772310853, 'timestamp': '2025-09-10 02:50:01.980949', 'step': 5235, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:02.033555', 'step': 5235, 'epoch': 3} +{'type': 'loss', 'content': 0.0012169089168310165, 'timestamp': '2025-09-10 02:50:02.039207', 'step': 5236, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:02.091699', 'step': 5236, 'epoch': 3} +{'type': 'loss', 'content': 0.0015258780913427472, 'timestamp': '2025-09-10 02:50:02.102000', 'step': 5237, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:02.155402', 'step': 5237, 'epoch': 3} +{'type': 'loss', 'content': 0.0004886474343948066, 'timestamp': '2025-09-10 02:50:02.158507', 'step': 5238, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:02.213982', 'step': 5238, 'epoch': 3} +{'type': 'loss', 'content': 0.0014229947701096535, 'timestamp': '2025-09-10 02:50:02.216008', 'step': 5239, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:02.268360', 'step': 5239, 'epoch': 3} +{'type': 'loss', 'content': 0.020801449194550514, 'timestamp': '2025-09-10 02:50:02.274068', 'step': 5240, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:02.326221', 'step': 5240, 'epoch': 3} +{'type': 'loss', 'content': 0.002947020810097456, 'timestamp': '2025-09-10 02:50:02.328188', 'step': 5241, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:02.380774', 'step': 5241, 'epoch': 3} +{'type': 'loss', 'content': 0.006273990031331778, 'timestamp': '2025-09-10 02:50:02.387519', 'step': 5242, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:02.440336', 'step': 5242, 'epoch': 3} +{'type': 'loss', 'content': 0.0004903948865830898, 'timestamp': '2025-09-10 02:50:02.442432', 'step': 5243, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:02.494775', 'step': 5243, 'epoch': 3} +{'type': 'loss', 'content': 0.004805770702660084, 'timestamp': '2025-09-10 02:50:02.500368', 'step': 5244, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:02.552023', 'step': 5244, 'epoch': 3} +{'type': 'loss', 'content': 0.0008763536461628973, 'timestamp': '2025-09-10 02:50:02.562236', 'step': 5245, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:02.615075', 'step': 5245, 'epoch': 3} +{'type': 'loss', 'content': 0.0014887356664985418, 'timestamp': '2025-09-10 02:50:02.618089', 'step': 5246, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:50:02.691564', 'step': 5246, 'epoch': 3} +{'type': 'loss', 'content': 0.0012682790402323008, 'timestamp': '2025-09-10 02:50:02.705229', 'step': 5247, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:02.760656', 'step': 5247, 'epoch': 3} +{'type': 'loss', 'content': 0.006945140194147825, 'timestamp': '2025-09-10 02:50:02.766329', 'step': 5248, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:02.818200', 'step': 5248, 'epoch': 3} +{'type': 'loss', 'content': 0.0011602779850363731, 'timestamp': '2025-09-10 02:50:02.820130', 'step': 5249, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 688], 'flops': 13760083599040.0}, 'timestamp': '2025-09-10 02:50:02.919393', 'step': 5249, 'epoch': 3} +{'type': 'loss', 'content': 0.0011014706688001752, 'timestamp': '2025-09-10 02:50:02.938681', 'step': 5250, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:02.991211', 'step': 5250, 'epoch': 3} +{'type': 'loss', 'content': 0.0003870258224196732, 'timestamp': '2025-09-10 02:50:02.993245', 'step': 5251, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:03.045795', 'step': 5251, 'epoch': 3} +{'type': 'loss', 'content': 0.0027917753905057907, 'timestamp': '2025-09-10 02:50:03.051352', 'step': 5252, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:03.103503', 'step': 5252, 'epoch': 3} +{'type': 'loss', 'content': 0.0017253583064302802, 'timestamp': '2025-09-10 02:50:03.105529', 'step': 5253, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:03.157895', 'step': 5253, 'epoch': 3} +{'type': 'loss', 'content': 0.0008569721831008792, 'timestamp': '2025-09-10 02:50:03.160570', 'step': 5254, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:03.214432', 'step': 5254, 'epoch': 3} +{'type': 'loss', 'content': 0.0019780828151851892, 'timestamp': '2025-09-10 02:50:03.224072', 'step': 5255, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:03.278397', 'step': 5255, 'epoch': 3} +{'type': 'loss', 'content': 0.001109988079406321, 'timestamp': '2025-09-10 02:50:03.283956', 'step': 5256, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:03.337577', 'step': 5256, 'epoch': 3} +{'type': 'loss', 'content': 0.010690425522625446, 'timestamp': '2025-09-10 02:50:03.339686', 'step': 5257, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:03.406882', 'step': 5257, 'epoch': 3} +{'type': 'loss', 'content': 0.0007719636196270585, 'timestamp': '2025-09-10 02:50:03.419075', 'step': 5258, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:03.474132', 'step': 5258, 'epoch': 3} +{'type': 'loss', 'content': 0.013832383789122105, 'timestamp': '2025-09-10 02:50:03.476227', 'step': 5259, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 560], 'flops': 11200068058304.0}, 'timestamp': '2025-09-10 02:50:03.559700', 'step': 5259, 'epoch': 3} +{'type': 'loss', 'content': 0.0005243135965429246, 'timestamp': '2025-09-10 02:50:03.575875', 'step': 5260, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:03.628883', 'step': 5260, 'epoch': 3} +{'type': 'loss', 'content': 0.0010769384680315852, 'timestamp': '2025-09-10 02:50:03.631037', 'step': 5261, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:03.683458', 'step': 5261, 'epoch': 3} +{'type': 'loss', 'content': 0.0008988159825094044, 'timestamp': '2025-09-10 02:50:03.690163', 'step': 5262, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:03.745355', 'step': 5262, 'epoch': 3} +{'type': 'loss', 'content': 0.001947202137671411, 'timestamp': '2025-09-10 02:50:03.747503', 'step': 5263, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:03.800540', 'step': 5263, 'epoch': 3} +{'type': 'loss', 'content': 0.0004767652426380664, 'timestamp': '2025-09-10 02:50:03.810382', 'step': 5264, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:03.863074', 'step': 5264, 'epoch': 3} +{'type': 'loss', 'content': 0.0008745061932131648, 'timestamp': '2025-09-10 02:50:03.873562', 'step': 5265, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:03.927947', 'step': 5265, 'epoch': 3} +{'type': 'loss', 'content': 0.0003695491177495569, 'timestamp': '2025-09-10 02:50:03.937516', 'step': 5266, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:03.996051', 'step': 5266, 'epoch': 3} +{'type': 'loss', 'content': 0.004437750205397606, 'timestamp': '2025-09-10 02:50:03.998048', 'step': 5267, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:04.051394', 'step': 5267, 'epoch': 3} +{'type': 'loss', 'content': 0.0095591489225626, 'timestamp': '2025-09-10 02:50:04.061750', 'step': 5268, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:04.113934', 'step': 5268, 'epoch': 3} +{'type': 'loss', 'content': 0.0005434318445622921, 'timestamp': '2025-09-10 02:50:04.115969', 'step': 5269, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:04.169009', 'step': 5269, 'epoch': 3} +{'type': 'loss', 'content': 0.0002766565012279898, 'timestamp': '2025-09-10 02:50:04.171104', 'step': 5270, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:04.223379', 'step': 5270, 'epoch': 3} +{'type': 'loss', 'content': 0.00032608662149868906, 'timestamp': '2025-09-10 02:50:04.225431', 'step': 5271, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:50:04.286624', 'step': 5271, 'epoch': 3} +{'type': 'loss', 'content': 0.00018476485274732113, 'timestamp': '2025-09-10 02:50:04.298482', 'step': 5272, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:04.350697', 'step': 5272, 'epoch': 3} +{'type': 'loss', 'content': 0.050519172102212906, 'timestamp': '2025-09-10 02:50:04.352929', 'step': 5273, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:04.405265', 'step': 5273, 'epoch': 3} +{'type': 'loss', 'content': 0.01290177647024393, 'timestamp': '2025-09-10 02:50:04.407599', 'step': 5274, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:04.460456', 'step': 5274, 'epoch': 3} +{'type': 'loss', 'content': 0.00845426507294178, 'timestamp': '2025-09-10 02:50:04.462593', 'step': 5275, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:04.515600', 'step': 5275, 'epoch': 3} +{'type': 'loss', 'content': 0.0026969367172569036, 'timestamp': '2025-09-10 02:50:04.526002', 'step': 5276, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:04.585166', 'step': 5276, 'epoch': 3} +{'type': 'loss', 'content': 0.0003303184930700809, 'timestamp': '2025-09-10 02:50:04.596920', 'step': 5277, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:04.649658', 'step': 5277, 'epoch': 3} +{'type': 'loss', 'content': 0.0027105682529509068, 'timestamp': '2025-09-10 02:50:04.651843', 'step': 5278, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:04.704198', 'step': 5278, 'epoch': 3} +{'type': 'loss', 'content': 0.00017241363821085542, 'timestamp': '2025-09-10 02:50:04.706185', 'step': 5279, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:04.759121', 'step': 5279, 'epoch': 3} +{'type': 'loss', 'content': 0.005337646696716547, 'timestamp': '2025-09-10 02:50:04.768050', 'step': 5280, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:04.820226', 'step': 5280, 'epoch': 3} +{'type': 'loss', 'content': 0.0004308401548769325, 'timestamp': '2025-09-10 02:50:04.822274', 'step': 5281, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:04.875457', 'step': 5281, 'epoch': 3} +{'type': 'loss', 'content': 0.00246291421353817, 'timestamp': '2025-09-10 02:50:04.877576', 'step': 5282, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:04.929980', 'step': 5282, 'epoch': 3} +{'type': 'loss', 'content': 0.0011630167718976736, 'timestamp': '2025-09-10 02:50:04.932070', 'step': 5283, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:04.984676', 'step': 5283, 'epoch': 3} +{'type': 'loss', 'content': 0.0007223097491078079, 'timestamp': '2025-09-10 02:50:04.992056', 'step': 5284, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:05.044053', 'step': 5284, 'epoch': 3} +{'type': 'loss', 'content': 0.017209595069289207, 'timestamp': '2025-09-10 02:50:05.046108', 'step': 5285, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:05.098883', 'step': 5285, 'epoch': 3} +{'type': 'loss', 'content': 0.007841082289814949, 'timestamp': '2025-09-10 02:50:05.100983', 'step': 5286, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:50:05.172825', 'step': 5286, 'epoch': 3} +{'type': 'loss', 'content': 0.009586544707417488, 'timestamp': '2025-09-10 02:50:05.186310', 'step': 5287, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:05.241373', 'step': 5287, 'epoch': 3} +{'type': 'loss', 'content': 0.012090668082237244, 'timestamp': '2025-09-10 02:50:05.251939', 'step': 5288, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:05.304447', 'step': 5288, 'epoch': 3} +{'type': 'loss', 'content': 0.0006836647517047822, 'timestamp': '2025-09-10 02:50:05.306611', 'step': 5289, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:05.359060', 'step': 5289, 'epoch': 3} +{'type': 'loss', 'content': 0.011419111862778664, 'timestamp': '2025-09-10 02:50:05.361316', 'step': 5290, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:05.413974', 'step': 5290, 'epoch': 3} +{'type': 'loss', 'content': 0.0009466545889154077, 'timestamp': '2025-09-10 02:50:05.422360', 'step': 5291, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:05.480355', 'step': 5291, 'epoch': 3} +{'type': 'loss', 'content': 0.024271532893180847, 'timestamp': '2025-09-10 02:50:05.491536', 'step': 5292, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:50:22.271618', 'step': 5292, 'epoch': 3} +{'type': 'pplx', 'content': 18397125.397900093, 'timestamp': '2025-09-10 02:50:22.274512', 'step': 5292, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:22.327638', 'step': 5292, 'epoch': 3} +{'type': 'loss', 'content': 0.0011591921793296933, 'timestamp': '2025-09-10 02:50:22.329836', 'step': 5293, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:22.383701', 'step': 5293, 'epoch': 3} +{'type': 'loss', 'content': 0.004721366334706545, 'timestamp': '2025-09-10 02:50:22.389465', 'step': 5294, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:22.443783', 'step': 5294, 'epoch': 3} +{'type': 'loss', 'content': 0.0016440298641100526, 'timestamp': '2025-09-10 02:50:22.446135', 'step': 5295, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:22.499553', 'step': 5295, 'epoch': 3} +{'type': 'loss', 'content': 0.005809945520013571, 'timestamp': '2025-09-10 02:50:22.505776', 'step': 5296, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:22.558844', 'step': 5296, 'epoch': 3} +{'type': 'loss', 'content': 0.003539426252245903, 'timestamp': '2025-09-10 02:50:22.561192', 'step': 5297, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:22.616786', 'step': 5297, 'epoch': 3} +{'type': 'loss', 'content': 0.01256350614130497, 'timestamp': '2025-09-10 02:50:22.626384', 'step': 5298, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:22.681203', 'step': 5298, 'epoch': 3} +{'type': 'loss', 'content': 0.0010120259830728173, 'timestamp': '2025-09-10 02:50:22.683406', 'step': 5299, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:22.736971', 'step': 5299, 'epoch': 3} +{'type': 'loss', 'content': 0.0008106070454232395, 'timestamp': '2025-09-10 02:50:22.745796', 'step': 5300, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:22.799346', 'step': 5300, 'epoch': 3} +{'type': 'loss', 'content': 0.009015606716275215, 'timestamp': '2025-09-10 02:50:22.801735', 'step': 5301, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:22.855084', 'step': 5301, 'epoch': 3} +{'type': 'loss', 'content': 0.003921045456081629, 'timestamp': '2025-09-10 02:50:22.857952', 'step': 5302, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 816], 'flops': 16320099139776.0}, 'timestamp': '2025-09-10 02:50:22.981061', 'step': 5302, 'epoch': 3} +{'type': 'loss', 'content': 0.0001959327346412465, 'timestamp': '2025-09-10 02:50:23.004068', 'step': 5303, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:23.062657', 'step': 5303, 'epoch': 3} +{'type': 'loss', 'content': 0.00602575670927763, 'timestamp': '2025-09-10 02:50:23.069013', 'step': 5304, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:23.121788', 'step': 5304, 'epoch': 3} +{'type': 'loss', 'content': 0.00045856714132241905, 'timestamp': '2025-09-10 02:50:23.128180', 'step': 5305, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:23.182475', 'step': 5305, 'epoch': 3} +{'type': 'loss', 'content': 0.004988010041415691, 'timestamp': '2025-09-10 02:50:23.192070', 'step': 5306, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:23.245894', 'step': 5306, 'epoch': 3} +{'type': 'loss', 'content': 0.0001976949133677408, 'timestamp': '2025-09-10 02:50:23.248123', 'step': 5307, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:23.301395', 'step': 5307, 'epoch': 3} +{'type': 'loss', 'content': 0.002062815008684993, 'timestamp': '2025-09-10 02:50:23.307758', 'step': 5308, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:23.360894', 'step': 5308, 'epoch': 3} +{'type': 'loss', 'content': 0.0031036154832690954, 'timestamp': '2025-09-10 02:50:23.363295', 'step': 5309, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:50:23.426194', 'step': 5309, 'epoch': 3} +{'type': 'loss', 'content': 0.0016385355265811086, 'timestamp': '2025-09-10 02:50:23.437294', 'step': 5310, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:23.490665', 'step': 5310, 'epoch': 3} +{'type': 'loss', 'content': 0.0016528499545529485, 'timestamp': '2025-09-10 02:50:23.492734', 'step': 5311, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:23.546085', 'step': 5311, 'epoch': 3} +{'type': 'loss', 'content': 0.014376426115632057, 'timestamp': '2025-09-10 02:50:23.552156', 'step': 5312, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:23.604568', 'step': 5312, 'epoch': 3} +{'type': 'loss', 'content': 0.0029676572885364294, 'timestamp': '2025-09-10 02:50:23.607660', 'step': 5313, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:23.660617', 'step': 5313, 'epoch': 3} +{'type': 'loss', 'content': 0.0019673847127705812, 'timestamp': '2025-09-10 02:50:23.663009', 'step': 5314, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:23.716001', 'step': 5314, 'epoch': 3} +{'type': 'loss', 'content': 0.0012650678399950266, 'timestamp': '2025-09-10 02:50:23.718546', 'step': 5315, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:23.772121', 'step': 5315, 'epoch': 3} +{'type': 'loss', 'content': 0.004298098385334015, 'timestamp': '2025-09-10 02:50:23.778304', 'step': 5316, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:23.831013', 'step': 5316, 'epoch': 3} +{'type': 'loss', 'content': 0.00012793357018381357, 'timestamp': '2025-09-10 02:50:23.840855', 'step': 5317, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:23.895615', 'step': 5317, 'epoch': 3} +{'type': 'loss', 'content': 0.0012616243911907077, 'timestamp': '2025-09-10 02:50:23.905397', 'step': 5318, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:23.966927', 'step': 5318, 'epoch': 3} +{'type': 'loss', 'content': 0.0018142081098631024, 'timestamp': '2025-09-10 02:50:23.977776', 'step': 5319, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:24.032002', 'step': 5319, 'epoch': 3} +{'type': 'loss', 'content': 0.00012109326053177938, 'timestamp': '2025-09-10 02:50:24.038117', 'step': 5320, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:24.091220', 'step': 5320, 'epoch': 3} +{'type': 'loss', 'content': 0.0007012794376350939, 'timestamp': '2025-09-10 02:50:24.099098', 'step': 5321, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:24.153444', 'step': 5321, 'epoch': 3} +{'type': 'loss', 'content': 1.7632090020924807e-05, 'timestamp': '2025-09-10 02:50:24.155546', 'step': 5322, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:24.210088', 'step': 5322, 'epoch': 3} +{'type': 'loss', 'content': 0.00014325756637845188, 'timestamp': '2025-09-10 02:50:24.212519', 'step': 5323, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:24.267232', 'step': 5323, 'epoch': 3} +{'type': 'loss', 'content': 0.011387856677174568, 'timestamp': '2025-09-10 02:50:24.273833', 'step': 5324, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:24.326952', 'step': 5324, 'epoch': 3} +{'type': 'loss', 'content': 0.007675164379179478, 'timestamp': '2025-09-10 02:50:24.329005', 'step': 5325, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:24.382665', 'step': 5325, 'epoch': 3} +{'type': 'loss', 'content': 0.004846174269914627, 'timestamp': '2025-09-10 02:50:24.388952', 'step': 5326, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:24.447956', 'step': 5326, 'epoch': 3} +{'type': 'loss', 'content': 0.0011303840437904, 'timestamp': '2025-09-10 02:50:24.458390', 'step': 5327, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:24.512542', 'step': 5327, 'epoch': 3} +{'type': 'loss', 'content': 0.0008211983949877322, 'timestamp': '2025-09-10 02:50:24.522929', 'step': 5328, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:24.575952', 'step': 5328, 'epoch': 3} +{'type': 'loss', 'content': 0.0006613574223592877, 'timestamp': '2025-09-10 02:50:24.578140', 'step': 5329, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:24.638834', 'step': 5329, 'epoch': 3} +{'type': 'loss', 'content': 0.007567502558231354, 'timestamp': '2025-09-10 02:50:24.649566', 'step': 5330, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:24.703403', 'step': 5330, 'epoch': 3} +{'type': 'loss', 'content': 0.003144640475511551, 'timestamp': '2025-09-10 02:50:24.705722', 'step': 5331, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:24.759139', 'step': 5331, 'epoch': 3} +{'type': 'loss', 'content': 1.945012809301261e-05, 'timestamp': '2025-09-10 02:50:24.765408', 'step': 5332, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:24.819605', 'step': 5332, 'epoch': 3} +{'type': 'loss', 'content': 0.016977153718471527, 'timestamp': '2025-09-10 02:50:24.823050', 'step': 5333, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:24.876603', 'step': 5333, 'epoch': 3} +{'type': 'loss', 'content': 4.153677218710072e-05, 'timestamp': '2025-09-10 02:50:24.883260', 'step': 5334, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:24.937150', 'step': 5334, 'epoch': 3} +{'type': 'loss', 'content': 0.0016523647354915738, 'timestamp': '2025-09-10 02:50:24.945241', 'step': 5335, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:24.998827', 'step': 5335, 'epoch': 3} +{'type': 'loss', 'content': 5.352885273168795e-05, 'timestamp': '2025-09-10 02:50:25.005504', 'step': 5336, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:25.058743', 'step': 5336, 'epoch': 3} +{'type': 'loss', 'content': 0.0002271160774398595, 'timestamp': '2025-09-10 02:50:25.060946', 'step': 5337, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:25.113485', 'step': 5337, 'epoch': 3} +{'type': 'loss', 'content': 0.00048105441965162754, 'timestamp': '2025-09-10 02:50:25.115878', 'step': 5338, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:25.172785', 'step': 5338, 'epoch': 3} +{'type': 'loss', 'content': 0.05529012158513069, 'timestamp': '2025-09-10 02:50:25.175107', 'step': 5339, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:25.236075', 'step': 5339, 'epoch': 3} +{'type': 'loss', 'content': 0.00013532188313547522, 'timestamp': '2025-09-10 02:50:25.242109', 'step': 5340, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:25.297393', 'step': 5340, 'epoch': 3} +{'type': 'loss', 'content': 9.117966692429036e-05, 'timestamp': '2025-09-10 02:50:25.301050', 'step': 5341, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:25.354272', 'step': 5341, 'epoch': 3} +{'type': 'loss', 'content': 0.0017126834718510509, 'timestamp': '2025-09-10 02:50:25.360855', 'step': 5342, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:25.414290', 'step': 5342, 'epoch': 3} +{'type': 'loss', 'content': 0.0002101500576827675, 'timestamp': '2025-09-10 02:50:25.416539', 'step': 5343, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:25.469957', 'step': 5343, 'epoch': 3} +{'type': 'loss', 'content': 0.0005071446648798883, 'timestamp': '2025-09-10 02:50:25.480329', 'step': 5344, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:25.535297', 'step': 5344, 'epoch': 3} +{'type': 'loss', 'content': 5.599417272605933e-05, 'timestamp': '2025-09-10 02:50:25.545710', 'step': 5345, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:25.610795', 'step': 5345, 'epoch': 3} +{'type': 'loss', 'content': 0.00012619995686691254, 'timestamp': '2025-09-10 02:50:25.614474', 'step': 5346, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:25.672047', 'step': 5346, 'epoch': 3} +{'type': 'loss', 'content': 0.00019272010831627995, 'timestamp': '2025-09-10 02:50:25.674466', 'step': 5347, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:25.734136', 'step': 5347, 'epoch': 3} +{'type': 'loss', 'content': 0.006375996861606836, 'timestamp': '2025-09-10 02:50:25.740213', 'step': 5348, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:25.792885', 'step': 5348, 'epoch': 3} +{'type': 'loss', 'content': 0.03304572030901909, 'timestamp': '2025-09-10 02:50:25.795176', 'step': 5349, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:25.849671', 'step': 5349, 'epoch': 3} +{'type': 'loss', 'content': 0.0018502881284803152, 'timestamp': '2025-09-10 02:50:25.859279', 'step': 5350, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:25.915180', 'step': 5350, 'epoch': 3} +{'type': 'loss', 'content': 0.0003769530158024281, 'timestamp': '2025-09-10 02:50:25.917415', 'step': 5351, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:25.980407', 'step': 5351, 'epoch': 3} +{'type': 'loss', 'content': 0.02025861106812954, 'timestamp': '2025-09-10 02:50:25.986598', 'step': 5352, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:26.039549', 'step': 5352, 'epoch': 3} +{'type': 'loss', 'content': 0.0017843242967501283, 'timestamp': '2025-09-10 02:50:26.044597', 'step': 5353, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:26.100910', 'step': 5353, 'epoch': 3} +{'type': 'loss', 'content': 0.0005629266379401088, 'timestamp': '2025-09-10 02:50:26.110696', 'step': 5354, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:26.165991', 'step': 5354, 'epoch': 3} +{'type': 'loss', 'content': 0.0008372010779567063, 'timestamp': '2025-09-10 02:50:26.175752', 'step': 5355, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:26.229762', 'step': 5355, 'epoch': 3} +{'type': 'loss', 'content': 0.001248279819265008, 'timestamp': '2025-09-10 02:50:26.236052', 'step': 5356, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:26.301898', 'step': 5356, 'epoch': 3} +{'type': 'loss', 'content': 0.006882089655846357, 'timestamp': '2025-09-10 02:50:26.304196', 'step': 5357, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:26.358507', 'step': 5357, 'epoch': 3} +{'type': 'loss', 'content': 0.0024386330042034388, 'timestamp': '2025-09-10 02:50:26.367998', 'step': 5358, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:26.423212', 'step': 5358, 'epoch': 3} +{'type': 'loss', 'content': 0.008268129080533981, 'timestamp': '2025-09-10 02:50:26.427502', 'step': 5359, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:26.482615', 'step': 5359, 'epoch': 3} +{'type': 'loss', 'content': 0.0021984220948070288, 'timestamp': '2025-09-10 02:50:26.488739', 'step': 5360, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:26.542010', 'step': 5360, 'epoch': 3} +{'type': 'loss', 'content': 6.90206652507186e-05, 'timestamp': '2025-09-10 02:50:26.544139', 'step': 5361, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:26.598008', 'step': 5361, 'epoch': 3} +{'type': 'loss', 'content': 0.0016047522658482194, 'timestamp': '2025-09-10 02:50:26.600915', 'step': 5362, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:26.655044', 'step': 5362, 'epoch': 3} +{'type': 'loss', 'content': 0.00024346180725842714, 'timestamp': '2025-09-10 02:50:26.657452', 'step': 5363, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:26.729624', 'step': 5363, 'epoch': 3} +{'type': 'loss', 'content': 0.00031420416780747473, 'timestamp': '2025-09-10 02:50:26.742638', 'step': 5364, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:26.800140', 'step': 5364, 'epoch': 3} +{'type': 'loss', 'content': 7.489165727747604e-05, 'timestamp': '2025-09-10 02:50:26.802397', 'step': 5365, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:26.855891', 'step': 5365, 'epoch': 3} +{'type': 'loss', 'content': 0.0002581734152045101, 'timestamp': '2025-09-10 02:50:26.858872', 'step': 5366, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:26.920786', 'step': 5366, 'epoch': 3} +{'type': 'loss', 'content': 0.016176769509911537, 'timestamp': '2025-09-10 02:50:26.931690', 'step': 5367, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:26.991576', 'step': 5367, 'epoch': 3} +{'type': 'loss', 'content': 0.026264488697052002, 'timestamp': '2025-09-10 02:50:27.001202', 'step': 5368, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:27.055888', 'step': 5368, 'epoch': 3} +{'type': 'loss', 'content': 0.00048563111340627074, 'timestamp': '2025-09-10 02:50:27.061962', 'step': 5369, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:27.115774', 'step': 5369, 'epoch': 3} +{'type': 'loss', 'content': 0.0005964773008599877, 'timestamp': '2025-09-10 02:50:27.125380', 'step': 5370, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:27.180216', 'step': 5370, 'epoch': 3} +{'type': 'loss', 'content': 0.005746359471231699, 'timestamp': '2025-09-10 02:50:27.190027', 'step': 5371, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:27.243450', 'step': 5371, 'epoch': 3} +{'type': 'loss', 'content': 0.0014409332070499659, 'timestamp': '2025-09-10 02:50:27.249395', 'step': 5372, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:27.302321', 'step': 5372, 'epoch': 3} +{'type': 'loss', 'content': 0.011967864818871021, 'timestamp': '2025-09-10 02:50:27.304705', 'step': 5373, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:27.357519', 'step': 5373, 'epoch': 3} +{'type': 'loss', 'content': 0.001208591042086482, 'timestamp': '2025-09-10 02:50:27.360711', 'step': 5374, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:27.414314', 'step': 5374, 'epoch': 3} +{'type': 'loss', 'content': 0.0003253155737183988, 'timestamp': '2025-09-10 02:50:27.416771', 'step': 5375, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:27.470978', 'step': 5375, 'epoch': 3} +{'type': 'loss', 'content': 0.001208487548865378, 'timestamp': '2025-09-10 02:50:27.476994', 'step': 5376, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:27.529330', 'step': 5376, 'epoch': 3} +{'type': 'loss', 'content': 6.788415339542553e-05, 'timestamp': '2025-09-10 02:50:27.532362', 'step': 5377, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:27.586534', 'step': 5377, 'epoch': 3} +{'type': 'loss', 'content': 0.01623762585222721, 'timestamp': '2025-09-10 02:50:27.596176', 'step': 5378, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:27.649286', 'step': 5378, 'epoch': 3} +{'type': 'loss', 'content': 0.008726070635020733, 'timestamp': '2025-09-10 02:50:27.651366', 'step': 5379, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:27.705050', 'step': 5379, 'epoch': 3} +{'type': 'loss', 'content': 0.001972564961761236, 'timestamp': '2025-09-10 02:50:27.711195', 'step': 5380, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:27.765181', 'step': 5380, 'epoch': 3} +{'type': 'loss', 'content': 0.038134124130010605, 'timestamp': '2025-09-10 02:50:27.772039', 'step': 5381, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:27.826305', 'step': 5381, 'epoch': 3} +{'type': 'loss', 'content': 0.0006542717455886304, 'timestamp': '2025-09-10 02:50:27.828527', 'step': 5382, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:27.881842', 'step': 5382, 'epoch': 3} +{'type': 'loss', 'content': 0.002229032339528203, 'timestamp': '2025-09-10 02:50:27.883998', 'step': 5383, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:27.936618', 'step': 5383, 'epoch': 3} +{'type': 'loss', 'content': 0.010771410539746284, 'timestamp': '2025-09-10 02:50:27.942558', 'step': 5384, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:27.995188', 'step': 5384, 'epoch': 3} +{'type': 'loss', 'content': 0.0005568374181166291, 'timestamp': '2025-09-10 02:50:27.997557', 'step': 5385, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:28.050713', 'step': 5385, 'epoch': 3} +{'type': 'loss', 'content': 0.00015441581490449607, 'timestamp': '2025-09-10 02:50:28.057300', 'step': 5386, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:28.110932', 'step': 5386, 'epoch': 3} +{'type': 'loss', 'content': 0.0003305087157059461, 'timestamp': '2025-09-10 02:50:28.113476', 'step': 5387, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:50:28.183079', 'step': 5387, 'epoch': 3} +{'type': 'loss', 'content': 0.05367087200284004, 'timestamp': '2025-09-10 02:50:28.196542', 'step': 5388, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:28.251310', 'step': 5388, 'epoch': 3} +{'type': 'loss', 'content': 0.0030221480410546064, 'timestamp': '2025-09-10 02:50:28.255550', 'step': 5389, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:28.310438', 'step': 5389, 'epoch': 3} +{'type': 'loss', 'content': 0.0005331829888746142, 'timestamp': '2025-09-10 02:50:28.314137', 'step': 5390, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:28.370259', 'step': 5390, 'epoch': 3} +{'type': 'loss', 'content': 0.0002382699603913352, 'timestamp': '2025-09-10 02:50:28.372799', 'step': 5391, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:28.428084', 'step': 5391, 'epoch': 3} +{'type': 'loss', 'content': 0.014002284035086632, 'timestamp': '2025-09-10 02:50:28.434525', 'step': 5392, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:28.487980', 'step': 5392, 'epoch': 3} +{'type': 'loss', 'content': 0.0010845428332686424, 'timestamp': '2025-09-10 02:50:28.490410', 'step': 5393, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:28.551609', 'step': 5393, 'epoch': 3} +{'type': 'loss', 'content': 0.0007954374304972589, 'timestamp': '2025-09-10 02:50:28.562317', 'step': 5394, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:28.616194', 'step': 5394, 'epoch': 3} +{'type': 'loss', 'content': 0.0015510314842686057, 'timestamp': '2025-09-10 02:50:28.618493', 'step': 5395, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:28.671212', 'step': 5395, 'epoch': 3} +{'type': 'loss', 'content': 0.00014971595373935997, 'timestamp': '2025-09-10 02:50:28.677483', 'step': 5396, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:28.730231', 'step': 5396, 'epoch': 3} +{'type': 'loss', 'content': 0.0469910204410553, 'timestamp': '2025-09-10 02:50:28.733041', 'step': 5397, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:28.786597', 'step': 5397, 'epoch': 3} +{'type': 'loss', 'content': 0.002077273325994611, 'timestamp': '2025-09-10 02:50:28.788914', 'step': 5398, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:28.855425', 'step': 5398, 'epoch': 3} +{'type': 'loss', 'content': 0.038137685507535934, 'timestamp': '2025-09-10 02:50:28.867655', 'step': 5399, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:28.921675', 'step': 5399, 'epoch': 3} +{'type': 'loss', 'content': 0.0066796415485441685, 'timestamp': '2025-09-10 02:50:28.928130', 'step': 5400, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:28.981242', 'step': 5400, 'epoch': 3} +{'type': 'loss', 'content': 0.0020001684315502644, 'timestamp': '2025-09-10 02:50:28.983789', 'step': 5401, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:29.037351', 'step': 5401, 'epoch': 3} +{'type': 'loss', 'content': 0.0014558539260178804, 'timestamp': '2025-09-10 02:50:29.039932', 'step': 5402, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:29.093342', 'step': 5402, 'epoch': 3} +{'type': 'loss', 'content': 0.00030777842039242387, 'timestamp': '2025-09-10 02:50:29.095676', 'step': 5403, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:29.149367', 'step': 5403, 'epoch': 3} +{'type': 'loss', 'content': 0.00032638476113788784, 'timestamp': '2025-09-10 02:50:29.159768', 'step': 5404, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:29.212587', 'step': 5404, 'epoch': 3} +{'type': 'loss', 'content': 0.0007634016801603138, 'timestamp': '2025-09-10 02:50:29.217656', 'step': 5405, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:29.272916', 'step': 5405, 'epoch': 3} +{'type': 'loss', 'content': 0.0004233828221913427, 'timestamp': '2025-09-10 02:50:29.275273', 'step': 5406, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:29.328417', 'step': 5406, 'epoch': 3} +{'type': 'loss', 'content': 0.009472401812672615, 'timestamp': '2025-09-10 02:50:29.330824', 'step': 5407, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:29.384166', 'step': 5407, 'epoch': 3} +{'type': 'loss', 'content': 0.000296570680802688, 'timestamp': '2025-09-10 02:50:29.390358', 'step': 5408, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:29.443118', 'step': 5408, 'epoch': 3} +{'type': 'loss', 'content': 0.006132566370069981, 'timestamp': '2025-09-10 02:50:29.445331', 'step': 5409, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:50:29.514110', 'step': 5409, 'epoch': 3} +{'type': 'loss', 'content': 0.011745997704565525, 'timestamp': '2025-09-10 02:50:29.526786', 'step': 5410, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:29.580290', 'step': 5410, 'epoch': 3} +{'type': 'loss', 'content': 0.0013986477861180902, 'timestamp': '2025-09-10 02:50:29.588322', 'step': 5411, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:29.641902', 'step': 5411, 'epoch': 3} +{'type': 'loss', 'content': 0.0001512405724497512, 'timestamp': '2025-09-10 02:50:29.648052', 'step': 5412, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:29.700847', 'step': 5412, 'epoch': 3} +{'type': 'loss', 'content': 0.004838789813220501, 'timestamp': '2025-09-10 02:50:29.703177', 'step': 5413, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:29.757936', 'step': 5413, 'epoch': 3} +{'type': 'loss', 'content': 0.04057854786515236, 'timestamp': '2025-09-10 02:50:29.760175', 'step': 5414, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:29.815423', 'step': 5414, 'epoch': 3} +{'type': 'loss', 'content': 0.000567415845580399, 'timestamp': '2025-09-10 02:50:29.825171', 'step': 5415, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:29.880104', 'step': 5415, 'epoch': 3} +{'type': 'loss', 'content': 0.001277577830478549, 'timestamp': '2025-09-10 02:50:29.886853', 'step': 5416, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:29.941093', 'step': 5416, 'epoch': 3} +{'type': 'loss', 'content': 0.0011723192874342203, 'timestamp': '2025-09-10 02:50:29.948194', 'step': 5417, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:30.002953', 'step': 5417, 'epoch': 3} +{'type': 'loss', 'content': 0.003885416779667139, 'timestamp': '2025-09-10 02:50:30.005244', 'step': 5418, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:30.060791', 'step': 5418, 'epoch': 3} +{'type': 'loss', 'content': 0.004850219469517469, 'timestamp': '2025-09-10 02:50:30.070574', 'step': 5419, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:30.124242', 'step': 5419, 'epoch': 3} +{'type': 'loss', 'content': 0.011279440484941006, 'timestamp': '2025-09-10 02:50:30.130552', 'step': 5420, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:30.183499', 'step': 5420, 'epoch': 3} +{'type': 'loss', 'content': 0.0006632082513533533, 'timestamp': '2025-09-10 02:50:30.193244', 'step': 5421, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:30.251349', 'step': 5421, 'epoch': 3} +{'type': 'loss', 'content': 0.0063169486820697784, 'timestamp': '2025-09-10 02:50:30.261777', 'step': 5422, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:30.316319', 'step': 5422, 'epoch': 3} +{'type': 'loss', 'content': 0.007491298019886017, 'timestamp': '2025-09-10 02:50:30.318794', 'step': 5423, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:30.372290', 'step': 5423, 'epoch': 3} +{'type': 'loss', 'content': 0.0004483237862586975, 'timestamp': '2025-09-10 02:50:30.378707', 'step': 5424, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:30.435470', 'step': 5424, 'epoch': 3} +{'type': 'loss', 'content': 0.0031863541807979345, 'timestamp': '2025-09-10 02:50:30.446658', 'step': 5425, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:30.500266', 'step': 5425, 'epoch': 3} +{'type': 'loss', 'content': 0.005209577735513449, 'timestamp': '2025-09-10 02:50:30.502770', 'step': 5426, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:30.556912', 'step': 5426, 'epoch': 3} +{'type': 'loss', 'content': 0.0029170033521950245, 'timestamp': '2025-09-10 02:50:30.562624', 'step': 5427, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:30.616674', 'step': 5427, 'epoch': 3} +{'type': 'loss', 'content': 0.009546552784740925, 'timestamp': '2025-09-10 02:50:30.623172', 'step': 5428, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:30.676822', 'step': 5428, 'epoch': 3} +{'type': 'loss', 'content': 0.004046494606882334, 'timestamp': '2025-09-10 02:50:30.679182', 'step': 5429, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:30.736033', 'step': 5429, 'epoch': 3} +{'type': 'loss', 'content': 0.0021987634245306253, 'timestamp': '2025-09-10 02:50:30.740844', 'step': 5430, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:30.802367', 'step': 5430, 'epoch': 3} +{'type': 'loss', 'content': 0.00036865490255877376, 'timestamp': '2025-09-10 02:50:30.813044', 'step': 5431, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:30.868031', 'step': 5431, 'epoch': 3} +{'type': 'loss', 'content': 0.0032375380396842957, 'timestamp': '2025-09-10 02:50:30.875457', 'step': 5432, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:30.940582', 'step': 5432, 'epoch': 3} +{'type': 'loss', 'content': 0.003072206163778901, 'timestamp': '2025-09-10 02:50:30.953791', 'step': 5433, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:31.008577', 'step': 5433, 'epoch': 3} +{'type': 'loss', 'content': 0.000492691237013787, 'timestamp': '2025-09-10 02:50:31.016109', 'step': 5434, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:31.070751', 'step': 5434, 'epoch': 3} +{'type': 'loss', 'content': 0.008733063004910946, 'timestamp': '2025-09-10 02:50:31.073067', 'step': 5435, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:31.127241', 'step': 5435, 'epoch': 3} +{'type': 'loss', 'content': 0.006456434726715088, 'timestamp': '2025-09-10 02:50:31.135879', 'step': 5436, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:31.188625', 'step': 5436, 'epoch': 3} +{'type': 'loss', 'content': 0.003922745119780302, 'timestamp': '2025-09-10 02:50:31.190940', 'step': 5437, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:31.244929', 'step': 5437, 'epoch': 3} +{'type': 'loss', 'content': 0.002351520350202918, 'timestamp': '2025-09-10 02:50:31.246953', 'step': 5438, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:31.300179', 'step': 5438, 'epoch': 3} +{'type': 'loss', 'content': 0.0005290330736897886, 'timestamp': '2025-09-10 02:50:31.302558', 'step': 5439, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:50:48.245868', 'step': 5439, 'epoch': 3} +{'type': 'pplx', 'content': 21247500.82676681, 'timestamp': '2025-09-10 02:50:48.249031', 'step': 5439, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:48.303235', 'step': 5439, 'epoch': 3} +{'type': 'loss', 'content': 0.003753113327547908, 'timestamp': '2025-09-10 02:50:48.309781', 'step': 5440, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:48.363142', 'step': 5440, 'epoch': 3} +{'type': 'loss', 'content': 0.000293015647912398, 'timestamp': '2025-09-10 02:50:48.365188', 'step': 5441, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:48.418445', 'step': 5441, 'epoch': 3} +{'type': 'loss', 'content': 0.001151990843936801, 'timestamp': '2025-09-10 02:50:48.420593', 'step': 5442, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:48.488338', 'step': 5442, 'epoch': 3} +{'type': 'loss', 'content': 0.0008370094001293182, 'timestamp': '2025-09-10 02:50:48.500912', 'step': 5443, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:48.554963', 'step': 5443, 'epoch': 3} +{'type': 'loss', 'content': 0.0033184869680553675, 'timestamp': '2025-09-10 02:50:48.560942', 'step': 5444, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:48.614925', 'step': 5444, 'epoch': 3} +{'type': 'loss', 'content': 0.0004803019401151687, 'timestamp': '2025-09-10 02:50:48.625399', 'step': 5445, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:48.678708', 'step': 5445, 'epoch': 3} +{'type': 'loss', 'content': 0.0025308416225016117, 'timestamp': '2025-09-10 02:50:48.681076', 'step': 5446, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:48.734997', 'step': 5446, 'epoch': 3} +{'type': 'loss', 'content': 0.000822969654109329, 'timestamp': '2025-09-10 02:50:48.742904', 'step': 5447, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:48.796634', 'step': 5447, 'epoch': 3} +{'type': 'loss', 'content': 0.0024315486662089825, 'timestamp': '2025-09-10 02:50:48.802760', 'step': 5448, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:48.855246', 'step': 5448, 'epoch': 3} +{'type': 'loss', 'content': 0.02165428176522255, 'timestamp': '2025-09-10 02:50:48.857213', 'step': 5449, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:48.915328', 'step': 5449, 'epoch': 3} +{'type': 'loss', 'content': 0.010248234495520592, 'timestamp': '2025-09-10 02:50:48.925796', 'step': 5450, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:48.992881', 'step': 5450, 'epoch': 3} +{'type': 'loss', 'content': 0.003237777855247259, 'timestamp': '2025-09-10 02:50:49.005136', 'step': 5451, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:49.058620', 'step': 5451, 'epoch': 3} +{'type': 'loss', 'content': 0.0008273234707303345, 'timestamp': '2025-09-10 02:50:49.064747', 'step': 5452, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:49.117470', 'step': 5452, 'epoch': 3} +{'type': 'loss', 'content': 0.0005221560131758451, 'timestamp': '2025-09-10 02:50:49.120397', 'step': 5453, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:49.173547', 'step': 5453, 'epoch': 3} +{'type': 'loss', 'content': 0.0015310164308175445, 'timestamp': '2025-09-10 02:50:49.176190', 'step': 5454, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:49.229888', 'step': 5454, 'epoch': 3} +{'type': 'loss', 'content': 0.003992181736975908, 'timestamp': '2025-09-10 02:50:49.232237', 'step': 5455, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:49.285370', 'step': 5455, 'epoch': 3} +{'type': 'loss', 'content': 0.00021070835646241903, 'timestamp': '2025-09-10 02:50:49.291169', 'step': 5456, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:49.344663', 'step': 5456, 'epoch': 3} +{'type': 'loss', 'content': 0.0002683685743249953, 'timestamp': '2025-09-10 02:50:49.347080', 'step': 5457, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:49.401186', 'step': 5457, 'epoch': 3} +{'type': 'loss', 'content': 0.0005706814699806273, 'timestamp': '2025-09-10 02:50:49.403578', 'step': 5458, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:49.456501', 'step': 5458, 'epoch': 3} +{'type': 'loss', 'content': 0.00032965565333142877, 'timestamp': '2025-09-10 02:50:49.458545', 'step': 5459, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:49.513396', 'step': 5459, 'epoch': 3} +{'type': 'loss', 'content': 0.001771595561876893, 'timestamp': '2025-09-10 02:50:49.524003', 'step': 5460, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:49.576234', 'step': 5460, 'epoch': 3} +{'type': 'loss', 'content': 0.002002180553972721, 'timestamp': '2025-09-10 02:50:49.578170', 'step': 5461, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:49.631207', 'step': 5461, 'epoch': 3} +{'type': 'loss', 'content': 0.024736350402235985, 'timestamp': '2025-09-10 02:50:49.637744', 'step': 5462, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:49.690738', 'step': 5462, 'epoch': 3} +{'type': 'loss', 'content': 0.006495007313787937, 'timestamp': '2025-09-10 02:50:49.692869', 'step': 5463, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:49.751037', 'step': 5463, 'epoch': 3} +{'type': 'loss', 'content': 0.00018918355635832995, 'timestamp': '2025-09-10 02:50:49.762263', 'step': 5464, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:49.815155', 'step': 5464, 'epoch': 3} +{'type': 'loss', 'content': 0.0016512200236320496, 'timestamp': '2025-09-10 02:50:49.817622', 'step': 5465, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:49.872124', 'step': 5465, 'epoch': 3} +{'type': 'loss', 'content': 0.001879508257843554, 'timestamp': '2025-09-10 02:50:49.874286', 'step': 5466, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:49.927744', 'step': 5466, 'epoch': 3} +{'type': 'loss', 'content': 0.005570161156356335, 'timestamp': '2025-09-10 02:50:49.935923', 'step': 5467, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:49.988952', 'step': 5467, 'epoch': 3} +{'type': 'loss', 'content': 0.0005711301346309483, 'timestamp': '2025-09-10 02:50:49.998293', 'step': 5468, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:50.050439', 'step': 5468, 'epoch': 3} +{'type': 'loss', 'content': 0.002147696213796735, 'timestamp': '2025-09-10 02:50:50.053431', 'step': 5469, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:50.106365', 'step': 5469, 'epoch': 3} +{'type': 'loss', 'content': 0.0021026621107012033, 'timestamp': '2025-09-10 02:50:50.108659', 'step': 5470, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:50.161654', 'step': 5470, 'epoch': 3} +{'type': 'loss', 'content': 0.0018775284988805652, 'timestamp': '2025-09-10 02:50:50.164146', 'step': 5471, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:50:50.238591', 'step': 5471, 'epoch': 3} +{'type': 'loss', 'content': 0.04646948724985123, 'timestamp': '2025-09-10 02:50:50.253049', 'step': 5472, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:50.306928', 'step': 5472, 'epoch': 3} +{'type': 'loss', 'content': 0.0019888910464942455, 'timestamp': '2025-09-10 02:50:50.317451', 'step': 5473, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:50.371566', 'step': 5473, 'epoch': 3} +{'type': 'loss', 'content': 0.00048026881995610893, 'timestamp': '2025-09-10 02:50:50.373889', 'step': 5474, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:50.426778', 'step': 5474, 'epoch': 3} +{'type': 'loss', 'content': 0.014864002354443073, 'timestamp': '2025-09-10 02:50:50.428872', 'step': 5475, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:50.483235', 'step': 5475, 'epoch': 3} +{'type': 'loss', 'content': 0.0006738911033608019, 'timestamp': '2025-09-10 02:50:50.493824', 'step': 5476, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:50:50.554215', 'step': 5476, 'epoch': 3} +{'type': 'loss', 'content': 0.004326379857957363, 'timestamp': '2025-09-10 02:50:50.566257', 'step': 5477, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:50.620155', 'step': 5477, 'epoch': 3} +{'type': 'loss', 'content': 0.0007383892661891878, 'timestamp': '2025-09-10 02:50:50.622521', 'step': 5478, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:50.676595', 'step': 5478, 'epoch': 3} +{'type': 'loss', 'content': 0.0009683158132247627, 'timestamp': '2025-09-10 02:50:50.679030', 'step': 5479, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:50.732190', 'step': 5479, 'epoch': 3} +{'type': 'loss', 'content': 0.0014861313393339515, 'timestamp': '2025-09-10 02:50:50.738135', 'step': 5480, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:50.803223', 'step': 5480, 'epoch': 3} +{'type': 'loss', 'content': 0.0002807900309562683, 'timestamp': '2025-09-10 02:50:50.816480', 'step': 5481, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:50.869477', 'step': 5481, 'epoch': 3} +{'type': 'loss', 'content': 0.00012591062113642693, 'timestamp': '2025-09-10 02:50:50.871604', 'step': 5482, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:50.924062', 'step': 5482, 'epoch': 3} +{'type': 'loss', 'content': 0.0022591904271394014, 'timestamp': '2025-09-10 02:50:50.926487', 'step': 5483, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:50.980231', 'step': 5483, 'epoch': 3} +{'type': 'loss', 'content': 0.0004140017263125628, 'timestamp': '2025-09-10 02:50:50.990644', 'step': 5484, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:51.043714', 'step': 5484, 'epoch': 3} +{'type': 'loss', 'content': 0.001005968195386231, 'timestamp': '2025-09-10 02:50:51.046109', 'step': 5485, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:51.099415', 'step': 5485, 'epoch': 3} +{'type': 'loss', 'content': 0.0004888789844699204, 'timestamp': '2025-09-10 02:50:51.101997', 'step': 5486, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:51.155772', 'step': 5486, 'epoch': 3} +{'type': 'loss', 'content': 0.0001547732827020809, 'timestamp': '2025-09-10 02:50:51.165324', 'step': 5487, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:51.219491', 'step': 5487, 'epoch': 3} +{'type': 'loss', 'content': 9.355550719192252e-05, 'timestamp': '2025-09-10 02:50:51.225943', 'step': 5488, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:51.278847', 'step': 5488, 'epoch': 3} +{'type': 'loss', 'content': 0.00025125700631178916, 'timestamp': '2025-09-10 02:50:51.281162', 'step': 5489, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:51.334444', 'step': 5489, 'epoch': 3} +{'type': 'loss', 'content': 0.0023470858577638865, 'timestamp': '2025-09-10 02:50:51.336770', 'step': 5490, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:51.390117', 'step': 5490, 'epoch': 3} +{'type': 'loss', 'content': 0.006363871973007917, 'timestamp': '2025-09-10 02:50:51.393182', 'step': 5491, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:51.447488', 'step': 5491, 'epoch': 3} +{'type': 'loss', 'content': 0.0032735734712332487, 'timestamp': '2025-09-10 02:50:51.456537', 'step': 5492, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:51.511171', 'step': 5492, 'epoch': 3} +{'type': 'loss', 'content': 0.002292569959536195, 'timestamp': '2025-09-10 02:50:51.513329', 'step': 5493, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:51.567198', 'step': 5493, 'epoch': 3} +{'type': 'loss', 'content': 0.014479009434580803, 'timestamp': '2025-09-10 02:50:51.569549', 'step': 5494, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:51.623138', 'step': 5494, 'epoch': 3} +{'type': 'loss', 'content': 0.0002753959270194173, 'timestamp': '2025-09-10 02:50:51.625757', 'step': 5495, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:51.690914', 'step': 5495, 'epoch': 3} +{'type': 'loss', 'content': 0.0003543792699929327, 'timestamp': '2025-09-10 02:50:51.702419', 'step': 5496, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:51.755201', 'step': 5496, 'epoch': 3} +{'type': 'loss', 'content': 9.240380313713104e-05, 'timestamp': '2025-09-10 02:50:51.757323', 'step': 5497, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:51.825216', 'step': 5497, 'epoch': 3} +{'type': 'loss', 'content': 5.237787991063669e-05, 'timestamp': '2025-09-10 02:50:51.837758', 'step': 5498, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:51.891465', 'step': 5498, 'epoch': 3} +{'type': 'loss', 'content': 9.766897710505873e-05, 'timestamp': '2025-09-10 02:50:51.901081', 'step': 5499, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:50:51.968010', 'step': 5499, 'epoch': 3} +{'type': 'loss', 'content': 0.002392894821241498, 'timestamp': '2025-09-10 02:50:51.981046', 'step': 5500, 'epoch': 3} +{'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-10 02:50:52.559884', 'step': 5500, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:52.616981', 'step': 5500, 'epoch': 3} +{'type': 'loss', 'content': 0.007339601870626211, 'timestamp': '2025-09-10 02:50:52.619043', 'step': 5501, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:50:52.679935', 'step': 5501, 'epoch': 3} +{'type': 'loss', 'content': 0.0016605450073257089, 'timestamp': '2025-09-10 02:50:52.690772', 'step': 5502, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:52.744575', 'step': 5502, 'epoch': 3} +{'type': 'loss', 'content': 0.0005885217688046396, 'timestamp': '2025-09-10 02:50:52.747206', 'step': 5503, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:52.800863', 'step': 5503, 'epoch': 3} +{'type': 'loss', 'content': 0.0009954218985512853, 'timestamp': '2025-09-10 02:50:52.806791', 'step': 5504, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:50:52.886207', 'step': 5504, 'epoch': 3} +{'type': 'loss', 'content': 0.0006120841135270894, 'timestamp': '2025-09-10 02:50:52.902746', 'step': 5505, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:52.957496', 'step': 5505, 'epoch': 3} +{'type': 'loss', 'content': 0.0008259991300292313, 'timestamp': '2025-09-10 02:50:52.959834', 'step': 5506, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:53.014056', 'step': 5506, 'epoch': 3} +{'type': 'loss', 'content': 0.005346500314772129, 'timestamp': '2025-09-10 02:50:53.016444', 'step': 5507, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:53.070291', 'step': 5507, 'epoch': 3} +{'type': 'loss', 'content': 0.002331335563212633, 'timestamp': '2025-09-10 02:50:53.076467', 'step': 5508, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:53.128700', 'step': 5508, 'epoch': 3} +{'type': 'loss', 'content': 0.00048764521488919854, 'timestamp': '2025-09-10 02:50:53.131279', 'step': 5509, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:53.189240', 'step': 5509, 'epoch': 3} +{'type': 'loss', 'content': 0.0007262100698426366, 'timestamp': '2025-09-10 02:50:53.199660', 'step': 5510, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:53.260692', 'step': 5510, 'epoch': 3} +{'type': 'loss', 'content': 0.001912870560772717, 'timestamp': '2025-09-10 02:50:53.271331', 'step': 5511, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:53.327452', 'step': 5511, 'epoch': 3} +{'type': 'loss', 'content': 0.00015238898049574345, 'timestamp': '2025-09-10 02:50:53.334003', 'step': 5512, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:53.387770', 'step': 5512, 'epoch': 3} +{'type': 'loss', 'content': 0.00010233638749923557, 'timestamp': '2025-09-10 02:50:53.389897', 'step': 5513, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:53.443502', 'step': 5513, 'epoch': 3} +{'type': 'loss', 'content': 0.0008512347121722996, 'timestamp': '2025-09-10 02:50:53.445808', 'step': 5514, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:50:53.498636', 'step': 5514, 'epoch': 3} +{'type': 'loss', 'content': 0.00026342945056967437, 'timestamp': '2025-09-10 02:50:53.501073', 'step': 5515, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:53.555496', 'step': 5515, 'epoch': 3} +{'type': 'loss', 'content': 0.00027247951948083937, 'timestamp': '2025-09-10 02:50:53.566010', 'step': 5516, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:53.619010', 'step': 5516, 'epoch': 3} +{'type': 'loss', 'content': 3.927114812540822e-05, 'timestamp': '2025-09-10 02:50:53.621035', 'step': 5517, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:53.683017', 'step': 5517, 'epoch': 3} +{'type': 'loss', 'content': 0.0011841331142932177, 'timestamp': '2025-09-10 02:50:53.693749', 'step': 5518, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:53.748139', 'step': 5518, 'epoch': 3} +{'type': 'loss', 'content': 0.004701241850852966, 'timestamp': '2025-09-10 02:50:53.750370', 'step': 5519, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:53.804539', 'step': 5519, 'epoch': 3} +{'type': 'loss', 'content': 0.0003399949346203357, 'timestamp': '2025-09-10 02:50:53.812880', 'step': 5520, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:53.867386', 'step': 5520, 'epoch': 3} +{'type': 'loss', 'content': 6.880938599351794e-05, 'timestamp': '2025-09-10 02:50:53.869567', 'step': 5521, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:53.924339', 'step': 5521, 'epoch': 3} +{'type': 'loss', 'content': 6.328742892947048e-05, 'timestamp': '2025-09-10 02:50:53.926792', 'step': 5522, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:53.979775', 'step': 5522, 'epoch': 3} +{'type': 'loss', 'content': 7.938553608255461e-05, 'timestamp': '2025-09-10 02:50:53.982100', 'step': 5523, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:54.035293', 'step': 5523, 'epoch': 3} +{'type': 'loss', 'content': 0.00015138849266804755, 'timestamp': '2025-09-10 02:50:54.041138', 'step': 5524, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:54.093972', 'step': 5524, 'epoch': 3} +{'type': 'loss', 'content': 0.00020000306540168822, 'timestamp': '2025-09-10 02:50:54.096498', 'step': 5525, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:54.157312', 'step': 5525, 'epoch': 3} +{'type': 'loss', 'content': 0.00018442697182763368, 'timestamp': '2025-09-10 02:50:54.168043', 'step': 5526, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:54.221881', 'step': 5526, 'epoch': 3} +{'type': 'loss', 'content': 0.0007379500311799347, 'timestamp': '2025-09-10 02:50:54.224265', 'step': 5527, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:54.277993', 'step': 5527, 'epoch': 3} +{'type': 'loss', 'content': 9.845731983659789e-05, 'timestamp': '2025-09-10 02:50:54.284071', 'step': 5528, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:54.336773', 'step': 5528, 'epoch': 3} +{'type': 'loss', 'content': 0.0001918896014103666, 'timestamp': '2025-09-10 02:50:54.343235', 'step': 5529, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:54.396334', 'step': 5529, 'epoch': 3} +{'type': 'loss', 'content': 0.0002484775031916797, 'timestamp': '2025-09-10 02:50:54.402905', 'step': 5530, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:54.467149', 'step': 5530, 'epoch': 3} +{'type': 'loss', 'content': 0.0002841740206349641, 'timestamp': '2025-09-10 02:50:54.477850', 'step': 5531, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:54.536160', 'step': 5531, 'epoch': 3} +{'type': 'loss', 'content': 0.00030048511689528823, 'timestamp': '2025-09-10 02:50:54.547366', 'step': 5532, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:54.602708', 'step': 5532, 'epoch': 3} +{'type': 'loss', 'content': 0.01263501774519682, 'timestamp': '2025-09-10 02:50:54.608893', 'step': 5533, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:54.662262', 'step': 5533, 'epoch': 3} +{'type': 'loss', 'content': 0.000945459702052176, 'timestamp': '2025-09-10 02:50:54.665870', 'step': 5534, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:54.719149', 'step': 5534, 'epoch': 3} +{'type': 'loss', 'content': 0.00015531423559878021, 'timestamp': '2025-09-10 02:50:54.721271', 'step': 5535, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:54.774373', 'step': 5535, 'epoch': 3} +{'type': 'loss', 'content': 0.013516255654394627, 'timestamp': '2025-09-10 02:50:54.782980', 'step': 5536, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:50:54.836966', 'step': 5536, 'epoch': 3} +{'type': 'loss', 'content': 0.0001032380314427428, 'timestamp': '2025-09-10 02:50:54.838959', 'step': 5537, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:54.891814', 'step': 5537, 'epoch': 3} +{'type': 'loss', 'content': 0.00046399797429330647, 'timestamp': '2025-09-10 02:50:54.893949', 'step': 5538, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:54.946779', 'step': 5538, 'epoch': 3} +{'type': 'loss', 'content': 0.0003333929053042084, 'timestamp': '2025-09-10 02:50:54.953380', 'step': 5539, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:55.006195', 'step': 5539, 'epoch': 3} +{'type': 'loss', 'content': 0.011815697886049747, 'timestamp': '2025-09-10 02:50:55.012225', 'step': 5540, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:55.066239', 'step': 5540, 'epoch': 3} +{'type': 'loss', 'content': 8.934136712923646e-05, 'timestamp': '2025-09-10 02:50:55.068415', 'step': 5541, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:55.121428', 'step': 5541, 'epoch': 3} +{'type': 'loss', 'content': 7.425570220220834e-05, 'timestamp': '2025-09-10 02:50:55.123796', 'step': 5542, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:55.177782', 'step': 5542, 'epoch': 3} +{'type': 'loss', 'content': 0.00948548037558794, 'timestamp': '2025-09-10 02:50:55.187429', 'step': 5543, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:55.242313', 'step': 5543, 'epoch': 3} +{'type': 'loss', 'content': 0.0003287082363385707, 'timestamp': '2025-09-10 02:50:55.248827', 'step': 5544, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:55.302270', 'step': 5544, 'epoch': 3} +{'type': 'loss', 'content': 0.03937207907438278, 'timestamp': '2025-09-10 02:50:55.305111', 'step': 5545, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:50:55.360480', 'step': 5545, 'epoch': 3} +{'type': 'loss', 'content': 0.0023267955984920263, 'timestamp': '2025-09-10 02:50:55.370273', 'step': 5546, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:55.424337', 'step': 5546, 'epoch': 3} +{'type': 'loss', 'content': 0.0011005288688465953, 'timestamp': '2025-09-10 02:50:55.430311', 'step': 5547, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:55.485859', 'step': 5547, 'epoch': 3} +{'type': 'loss', 'content': 0.03789230063557625, 'timestamp': '2025-09-10 02:50:55.491761', 'step': 5548, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:55.544789', 'step': 5548, 'epoch': 3} +{'type': 'loss', 'content': 0.0007854384020902216, 'timestamp': '2025-09-10 02:50:55.546910', 'step': 5549, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:50:55.600424', 'step': 5549, 'epoch': 3} +{'type': 'loss', 'content': 8.665223867865279e-05, 'timestamp': '2025-09-10 02:50:55.602513', 'step': 5550, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:55.656182', 'step': 5550, 'epoch': 3} +{'type': 'loss', 'content': 0.001957099651917815, 'timestamp': '2025-09-10 02:50:55.665793', 'step': 5551, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:55.733816', 'step': 5551, 'epoch': 3} +{'type': 'loss', 'content': 0.00036005611764267087, 'timestamp': '2025-09-10 02:50:55.747262', 'step': 5552, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:55.800075', 'step': 5552, 'epoch': 3} +{'type': 'loss', 'content': 2.9516892027459107e-05, 'timestamp': '2025-09-10 02:50:55.802198', 'step': 5553, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:55.855236', 'step': 5553, 'epoch': 3} +{'type': 'loss', 'content': 0.003146077273413539, 'timestamp': '2025-09-10 02:50:55.857474', 'step': 5554, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:50:55.931067', 'step': 5554, 'epoch': 3} +{'type': 'loss', 'content': 0.006576284300535917, 'timestamp': '2025-09-10 02:50:55.944782', 'step': 5555, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:55.997950', 'step': 5555, 'epoch': 3} +{'type': 'loss', 'content': 0.0001338785223197192, 'timestamp': '2025-09-10 02:50:56.003689', 'step': 5556, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:56.056429', 'step': 5556, 'epoch': 3} +{'type': 'loss', 'content': 0.0001883771037682891, 'timestamp': '2025-09-10 02:50:56.058476', 'step': 5557, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:56.111591', 'step': 5557, 'epoch': 3} +{'type': 'loss', 'content': 0.0005519331898540258, 'timestamp': '2025-09-10 02:50:56.114595', 'step': 5558, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:50:56.183292', 'step': 5558, 'epoch': 3} +{'type': 'loss', 'content': 8.991384675027803e-05, 'timestamp': '2025-09-10 02:50:56.196048', 'step': 5559, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:50:56.257187', 'step': 5559, 'epoch': 3} +{'type': 'loss', 'content': 9.897096606437117e-05, 'timestamp': '2025-09-10 02:50:56.268709', 'step': 5560, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:50:56.328946', 'step': 5560, 'epoch': 3} +{'type': 'loss', 'content': 0.00011606734187807888, 'timestamp': '2025-09-10 02:50:56.340926', 'step': 5561, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:50:56.396302', 'step': 5561, 'epoch': 3} +{'type': 'loss', 'content': 0.0009803232969716191, 'timestamp': '2025-09-10 02:50:56.398346', 'step': 5562, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:56.452403', 'step': 5562, 'epoch': 3} +{'type': 'loss', 'content': 0.0026255848351866007, 'timestamp': '2025-09-10 02:50:56.454449', 'step': 5563, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:56.508528', 'step': 5563, 'epoch': 3} +{'type': 'loss', 'content': 0.0004570243472699076, 'timestamp': '2025-09-10 02:50:56.516514', 'step': 5564, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:56.577886', 'step': 5564, 'epoch': 3} +{'type': 'loss', 'content': 5.214964039623737e-05, 'timestamp': '2025-09-10 02:50:56.580293', 'step': 5565, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:56.637961', 'step': 5565, 'epoch': 3} +{'type': 'loss', 'content': 0.00032835971796885133, 'timestamp': '2025-09-10 02:50:56.644434', 'step': 5566, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:50:56.722700', 'step': 5566, 'epoch': 3} +{'type': 'loss', 'content': 8.543814328731969e-05, 'timestamp': '2025-09-10 02:50:56.736429', 'step': 5567, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:50:56.804505', 'step': 5567, 'epoch': 3} +{'type': 'loss', 'content': 0.0003689560398925096, 'timestamp': '2025-09-10 02:50:56.817845', 'step': 5568, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:56.871219', 'step': 5568, 'epoch': 3} +{'type': 'loss', 'content': 0.00021741418458987027, 'timestamp': '2025-09-10 02:50:56.873500', 'step': 5569, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:50:56.927113', 'step': 5569, 'epoch': 3} +{'type': 'loss', 'content': 0.0002533073420636356, 'timestamp': '2025-09-10 02:50:56.933540', 'step': 5570, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:50:56.987355', 'step': 5570, 'epoch': 3} +{'type': 'loss', 'content': 0.0006551762926392257, 'timestamp': '2025-09-10 02:50:56.996965', 'step': 5571, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:57.050204', 'step': 5571, 'epoch': 3} +{'type': 'loss', 'content': 0.0001837767194956541, 'timestamp': '2025-09-10 02:50:57.056298', 'step': 5572, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:57.109527', 'step': 5572, 'epoch': 3} +{'type': 'loss', 'content': 9.328331361757591e-05, 'timestamp': '2025-09-10 02:50:57.111612', 'step': 5573, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:50:57.174219', 'step': 5573, 'epoch': 3} +{'type': 'loss', 'content': 0.0003454013785813004, 'timestamp': '2025-09-10 02:50:57.185326', 'step': 5574, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:50:57.245285', 'step': 5574, 'epoch': 3} +{'type': 'loss', 'content': 0.0008125525782816112, 'timestamp': '2025-09-10 02:50:57.255727', 'step': 5575, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:57.310342', 'step': 5575, 'epoch': 3} +{'type': 'loss', 'content': 9.21983300941065e-05, 'timestamp': '2025-09-10 02:50:57.316376', 'step': 5576, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:57.369435', 'step': 5576, 'epoch': 3} +{'type': 'loss', 'content': 0.0045641278848052025, 'timestamp': '2025-09-10 02:50:57.371364', 'step': 5577, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:57.424501', 'step': 5577, 'epoch': 3} +{'type': 'loss', 'content': 0.00022250918846111745, 'timestamp': '2025-09-10 02:50:57.426829', 'step': 5578, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:50:57.479765', 'step': 5578, 'epoch': 3} +{'type': 'loss', 'content': 0.00028944681980647147, 'timestamp': '2025-09-10 02:50:57.487803', 'step': 5579, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:57.541304', 'step': 5579, 'epoch': 3} +{'type': 'loss', 'content': 0.016156604513525963, 'timestamp': '2025-09-10 02:50:57.547080', 'step': 5580, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:57.599502', 'step': 5580, 'epoch': 3} +{'type': 'loss', 'content': 0.009447270072996616, 'timestamp': '2025-09-10 02:50:57.602387', 'step': 5581, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:50:57.655437', 'step': 5581, 'epoch': 3} +{'type': 'loss', 'content': 0.0012968778610229492, 'timestamp': '2025-09-10 02:50:57.657657', 'step': 5582, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:50:57.710990', 'step': 5582, 'epoch': 3} +{'type': 'loss', 'content': 0.0002128742344211787, 'timestamp': '2025-09-10 02:50:57.712829', 'step': 5583, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:50:57.765464', 'step': 5583, 'epoch': 3} +{'type': 'loss', 'content': 0.0008937938255257905, 'timestamp': '2025-09-10 02:50:57.771036', 'step': 5584, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:50:57.823641', 'step': 5584, 'epoch': 3} +{'type': 'loss', 'content': 0.001554557355120778, 'timestamp': '2025-09-10 02:50:57.826579', 'step': 5585, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:50:57.879486', 'step': 5585, 'epoch': 3} +{'type': 'loss', 'content': 0.0005392265156842768, 'timestamp': '2025-09-10 02:50:57.881676', 'step': 5586, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:51:15.287716', 'step': 5586, 'epoch': 3} +{'type': 'pplx', 'content': 21562904.896350686, 'timestamp': '2025-09-10 02:51:15.291034', 'step': 5586, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:15.345302', 'step': 5586, 'epoch': 3} +{'type': 'loss', 'content': 0.0002189187944168225, 'timestamp': '2025-09-10 02:51:15.347597', 'step': 5587, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:15.405770', 'step': 5587, 'epoch': 3} +{'type': 'loss', 'content': 0.005762639455497265, 'timestamp': '2025-09-10 02:51:15.417010', 'step': 5588, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:15.471022', 'step': 5588, 'epoch': 3} +{'type': 'loss', 'content': 0.0034079679753631353, 'timestamp': '2025-09-10 02:51:15.473267', 'step': 5589, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:15.527110', 'step': 5589, 'epoch': 3} +{'type': 'loss', 'content': 0.00013979661162011325, 'timestamp': '2025-09-10 02:51:15.529145', 'step': 5590, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:15.582826', 'step': 5590, 'epoch': 3} +{'type': 'loss', 'content': 0.0018825005972757936, 'timestamp': '2025-09-10 02:51:15.584956', 'step': 5591, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:15.639262', 'step': 5591, 'epoch': 3} +{'type': 'loss', 'content': 0.0014448349829763174, 'timestamp': '2025-09-10 02:51:15.647050', 'step': 5592, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:15.701998', 'step': 5592, 'epoch': 3} +{'type': 'loss', 'content': 7.661448034923524e-05, 'timestamp': '2025-09-10 02:51:15.708441', 'step': 5593, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:15.765633', 'step': 5593, 'epoch': 3} +{'type': 'loss', 'content': 0.00022054556757211685, 'timestamp': '2025-09-10 02:51:15.767690', 'step': 5594, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:15.822329', 'step': 5594, 'epoch': 3} +{'type': 'loss', 'content': 0.00024557029246352613, 'timestamp': '2025-09-10 02:51:15.832187', 'step': 5595, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:15.886457', 'step': 5595, 'epoch': 3} +{'type': 'loss', 'content': 0.0070946416817605495, 'timestamp': '2025-09-10 02:51:15.895548', 'step': 5596, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:15.949269', 'step': 5596, 'epoch': 3} +{'type': 'loss', 'content': 0.00019243491988163441, 'timestamp': '2025-09-10 02:51:15.954918', 'step': 5597, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:16.011547', 'step': 5597, 'epoch': 3} +{'type': 'loss', 'content': 0.00012800561671610922, 'timestamp': '2025-09-10 02:51:16.013954', 'step': 5598, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:16.067055', 'step': 5598, 'epoch': 3} +{'type': 'loss', 'content': 0.0013608066365122795, 'timestamp': '2025-09-10 02:51:16.073457', 'step': 5599, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:16.127115', 'step': 5599, 'epoch': 3} +{'type': 'loss', 'content': 4.2568775825202465e-05, 'timestamp': '2025-09-10 02:51:16.133121', 'step': 5600, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 784], 'flops': 15680095254592.0}, 'timestamp': '2025-09-10 02:51:16.244735', 'step': 5600, 'epoch': 3} +{'type': 'loss', 'content': 0.00547827547416091, 'timestamp': '2025-09-10 02:51:16.268995', 'step': 5601, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:16.324313', 'step': 5601, 'epoch': 3} +{'type': 'loss', 'content': 0.0016723232110962272, 'timestamp': '2025-09-10 02:51:16.334116', 'step': 5602, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:16.387570', 'step': 5602, 'epoch': 3} +{'type': 'loss', 'content': 0.0008668908849358559, 'timestamp': '2025-09-10 02:51:16.389681', 'step': 5603, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:16.442579', 'step': 5603, 'epoch': 3} +{'type': 'loss', 'content': 6.555887375725433e-05, 'timestamp': '2025-09-10 02:51:16.451409', 'step': 5604, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:16.503988', 'step': 5604, 'epoch': 3} +{'type': 'loss', 'content': 0.0004841067420784384, 'timestamp': '2025-09-10 02:51:16.510300', 'step': 5605, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:51:16.578976', 'step': 5605, 'epoch': 3} +{'type': 'loss', 'content': 0.0002817298227455467, 'timestamp': '2025-09-10 02:51:16.591664', 'step': 5606, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:16.644881', 'step': 5606, 'epoch': 3} +{'type': 'loss', 'content': 0.0006291944882832468, 'timestamp': '2025-09-10 02:51:16.647037', 'step': 5607, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:16.700166', 'step': 5607, 'epoch': 3} +{'type': 'loss', 'content': 0.00028438554727472365, 'timestamp': '2025-09-10 02:51:16.706223', 'step': 5608, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:16.758884', 'step': 5608, 'epoch': 3} +{'type': 'loss', 'content': 0.0002940757549367845, 'timestamp': '2025-09-10 02:51:16.761571', 'step': 5609, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:16.815242', 'step': 5609, 'epoch': 3} +{'type': 'loss', 'content': 0.0002263123169541359, 'timestamp': '2025-09-10 02:51:16.817591', 'step': 5610, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:16.870650', 'step': 5610, 'epoch': 3} +{'type': 'loss', 'content': 0.0012129464885219932, 'timestamp': '2025-09-10 02:51:16.872860', 'step': 5611, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:16.925643', 'step': 5611, 'epoch': 3} +{'type': 'loss', 'content': 0.00016381089517381042, 'timestamp': '2025-09-10 02:51:16.931724', 'step': 5612, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:16.984213', 'step': 5612, 'epoch': 3} +{'type': 'loss', 'content': 0.00024286587722599506, 'timestamp': '2025-09-10 02:51:16.987184', 'step': 5613, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:17.040259', 'step': 5613, 'epoch': 3} +{'type': 'loss', 'content': 0.0007025203667581081, 'timestamp': '2025-09-10 02:51:17.043790', 'step': 5614, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:17.097547', 'step': 5614, 'epoch': 3} +{'type': 'loss', 'content': 4.814678322873078e-05, 'timestamp': '2025-09-10 02:51:17.099793', 'step': 5615, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:17.152529', 'step': 5615, 'epoch': 3} +{'type': 'loss', 'content': 0.0010319777065888047, 'timestamp': '2025-09-10 02:51:17.158470', 'step': 5616, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:17.211270', 'step': 5616, 'epoch': 3} +{'type': 'loss', 'content': 0.0005186051712371409, 'timestamp': '2025-09-10 02:51:17.213595', 'step': 5617, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:51:17.280256', 'step': 5617, 'epoch': 3} +{'type': 'loss', 'content': 0.00016246965969912708, 'timestamp': '2025-09-10 02:51:17.292476', 'step': 5618, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:17.346352', 'step': 5618, 'epoch': 3} +{'type': 'loss', 'content': 0.0008623627363704145, 'timestamp': '2025-09-10 02:51:17.352708', 'step': 5619, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:17.405630', 'step': 5619, 'epoch': 3} +{'type': 'loss', 'content': 0.019034624099731445, 'timestamp': '2025-09-10 02:51:17.411578', 'step': 5620, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:51:17.484073', 'step': 5620, 'epoch': 3} +{'type': 'loss', 'content': 0.00028661653050221503, 'timestamp': '2025-09-10 02:51:17.499310', 'step': 5621, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:51:17.559957', 'step': 5621, 'epoch': 3} +{'type': 'loss', 'content': 0.00014885196287650615, 'timestamp': '2025-09-10 02:51:17.570700', 'step': 5622, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:17.624566', 'step': 5622, 'epoch': 3} +{'type': 'loss', 'content': 0.0035353959538042545, 'timestamp': '2025-09-10 02:51:17.627043', 'step': 5623, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:17.680286', 'step': 5623, 'epoch': 3} +{'type': 'loss', 'content': 0.0007466920651495457, 'timestamp': '2025-09-10 02:51:17.686271', 'step': 5624, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:17.738735', 'step': 5624, 'epoch': 3} +{'type': 'loss', 'content': 5.3479107009479776e-05, 'timestamp': '2025-09-10 02:51:17.740976', 'step': 5625, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:17.793780', 'step': 5625, 'epoch': 3} +{'type': 'loss', 'content': 0.0002028747258009389, 'timestamp': '2025-09-10 02:51:17.796238', 'step': 5626, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:17.849926', 'step': 5626, 'epoch': 3} +{'type': 'loss', 'content': 0.00012403483560774475, 'timestamp': '2025-09-10 02:51:17.853349', 'step': 5627, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:17.909952', 'step': 5627, 'epoch': 3} +{'type': 'loss', 'content': 0.001594047644175589, 'timestamp': '2025-09-10 02:51:17.915992', 'step': 5628, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:17.969204', 'step': 5628, 'epoch': 3} +{'type': 'loss', 'content': 0.0006895575788803399, 'timestamp': '2025-09-10 02:51:17.971292', 'step': 5629, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:18.024178', 'step': 5629, 'epoch': 3} +{'type': 'loss', 'content': 0.00022008655651006848, 'timestamp': '2025-09-10 02:51:18.026564', 'step': 5630, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:18.079842', 'step': 5630, 'epoch': 3} +{'type': 'loss', 'content': 8.427882130490616e-05, 'timestamp': '2025-09-10 02:51:18.082072', 'step': 5631, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:18.135130', 'step': 5631, 'epoch': 3} +{'type': 'loss', 'content': 9.123433119384572e-05, 'timestamp': '2025-09-10 02:51:18.141377', 'step': 5632, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:18.194232', 'step': 5632, 'epoch': 3} +{'type': 'loss', 'content': 0.0001261413999600336, 'timestamp': '2025-09-10 02:51:18.197035', 'step': 5633, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:18.250716', 'step': 5633, 'epoch': 3} +{'type': 'loss', 'content': 0.00015730482118669897, 'timestamp': '2025-09-10 02:51:18.252944', 'step': 5634, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:18.307541', 'step': 5634, 'epoch': 3} +{'type': 'loss', 'content': 7.122440001694486e-05, 'timestamp': '2025-09-10 02:51:18.309581', 'step': 5635, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:18.364021', 'step': 5635, 'epoch': 3} +{'type': 'loss', 'content': 0.00010261707211611792, 'timestamp': '2025-09-10 02:51:18.374607', 'step': 5636, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:18.427138', 'step': 5636, 'epoch': 3} +{'type': 'loss', 'content': 0.00017502308764960617, 'timestamp': '2025-09-10 02:51:18.435321', 'step': 5637, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:18.488714', 'step': 5637, 'epoch': 3} +{'type': 'loss', 'content': 0.0001294859393965453, 'timestamp': '2025-09-10 02:51:18.490887', 'step': 5638, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:18.543941', 'step': 5638, 'epoch': 3} +{'type': 'loss', 'content': 0.0035855176392942667, 'timestamp': '2025-09-10 02:51:18.550343', 'step': 5639, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:51:18.611969', 'step': 5639, 'epoch': 3} +{'type': 'loss', 'content': 0.00014155091776046902, 'timestamp': '2025-09-10 02:51:18.623585', 'step': 5640, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:18.676350', 'step': 5640, 'epoch': 3} +{'type': 'loss', 'content': 0.0005143466405570507, 'timestamp': '2025-09-10 02:51:18.678780', 'step': 5641, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:18.731355', 'step': 5641, 'epoch': 3} +{'type': 'loss', 'content': 0.0018331704195588827, 'timestamp': '2025-09-10 02:51:18.734351', 'step': 5642, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:18.787806', 'step': 5642, 'epoch': 3} +{'type': 'loss', 'content': 7.844376523280516e-05, 'timestamp': '2025-09-10 02:51:18.794283', 'step': 5643, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:18.847211', 'step': 5643, 'epoch': 3} +{'type': 'loss', 'content': 0.0004243445582687855, 'timestamp': '2025-09-10 02:51:18.853206', 'step': 5644, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:18.905796', 'step': 5644, 'epoch': 3} +{'type': 'loss', 'content': 0.001895484165288508, 'timestamp': '2025-09-10 02:51:18.908663', 'step': 5645, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:18.961549', 'step': 5645, 'epoch': 3} +{'type': 'loss', 'content': 0.00258989492431283, 'timestamp': '2025-09-10 02:51:18.969592', 'step': 5646, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:19.024801', 'step': 5646, 'epoch': 3} +{'type': 'loss', 'content': 0.00010115856275660917, 'timestamp': '2025-09-10 02:51:19.026984', 'step': 5647, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:19.080966', 'step': 5647, 'epoch': 3} +{'type': 'loss', 'content': 0.0011619441211223602, 'timestamp': '2025-09-10 02:51:19.091332', 'step': 5648, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:19.148581', 'step': 5648, 'epoch': 3} +{'type': 'loss', 'content': 0.00019926049571949989, 'timestamp': '2025-09-10 02:51:19.150934', 'step': 5649, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:19.203687', 'step': 5649, 'epoch': 3} +{'type': 'loss', 'content': 0.0004339973093010485, 'timestamp': '2025-09-10 02:51:19.206012', 'step': 5650, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:19.264518', 'step': 5650, 'epoch': 3} +{'type': 'loss', 'content': 0.0011778261978179216, 'timestamp': '2025-09-10 02:51:19.272668', 'step': 5651, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:19.325213', 'step': 5651, 'epoch': 3} +{'type': 'loss', 'content': 0.00022192489996086806, 'timestamp': '2025-09-10 02:51:19.331121', 'step': 5652, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:19.387059', 'step': 5652, 'epoch': 3} +{'type': 'loss', 'content': 0.000944877858273685, 'timestamp': '2025-09-10 02:51:19.389247', 'step': 5653, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:19.442320', 'step': 5653, 'epoch': 3} +{'type': 'loss', 'content': 0.00040989063563756645, 'timestamp': '2025-09-10 02:51:19.444540', 'step': 5654, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:51:19.515338', 'step': 5654, 'epoch': 3} +{'type': 'loss', 'content': 0.00010838153684744611, 'timestamp': '2025-09-10 02:51:19.527923', 'step': 5655, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:19.583173', 'step': 5655, 'epoch': 3} +{'type': 'loss', 'content': 0.00010888419637922198, 'timestamp': '2025-09-10 02:51:19.589163', 'step': 5656, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:19.640982', 'step': 5656, 'epoch': 3} +{'type': 'loss', 'content': 0.0004886123933829367, 'timestamp': '2025-09-10 02:51:19.643992', 'step': 5657, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:19.697491', 'step': 5657, 'epoch': 3} +{'type': 'loss', 'content': 0.00011198705033166334, 'timestamp': '2025-09-10 02:51:19.699476', 'step': 5658, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:19.752281', 'step': 5658, 'epoch': 3} +{'type': 'loss', 'content': 0.004554561339318752, 'timestamp': '2025-09-10 02:51:19.757833', 'step': 5659, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:19.812724', 'step': 5659, 'epoch': 3} +{'type': 'loss', 'content': 0.0009690019069239497, 'timestamp': '2025-09-10 02:51:19.818566', 'step': 5660, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:19.871400', 'step': 5660, 'epoch': 3} +{'type': 'loss', 'content': 0.00010577777720754966, 'timestamp': '2025-09-10 02:51:19.873709', 'step': 5661, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:19.926233', 'step': 5661, 'epoch': 3} +{'type': 'loss', 'content': 0.0001331623352598399, 'timestamp': '2025-09-10 02:51:19.928457', 'step': 5662, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:19.981125', 'step': 5662, 'epoch': 3} +{'type': 'loss', 'content': 0.002772526117041707, 'timestamp': '2025-09-10 02:51:19.984086', 'step': 5663, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:20.036585', 'step': 5663, 'epoch': 3} +{'type': 'loss', 'content': 4.7308487410191447e-05, 'timestamp': '2025-09-10 02:51:20.042504', 'step': 5664, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:20.095374', 'step': 5664, 'epoch': 3} +{'type': 'loss', 'content': 0.0002683405764400959, 'timestamp': '2025-09-10 02:51:20.098250', 'step': 5665, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:20.151062', 'step': 5665, 'epoch': 3} +{'type': 'loss', 'content': 0.00014960307453293353, 'timestamp': '2025-09-10 02:51:20.159360', 'step': 5666, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:20.212715', 'step': 5666, 'epoch': 3} +{'type': 'loss', 'content': 0.002826147014275193, 'timestamp': '2025-09-10 02:51:20.215022', 'step': 5667, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:20.267819', 'step': 5667, 'epoch': 3} +{'type': 'loss', 'content': 0.0011734537547454238, 'timestamp': '2025-09-10 02:51:20.275136', 'step': 5668, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:20.331313', 'step': 5668, 'epoch': 3} +{'type': 'loss', 'content': 0.0076494039967656136, 'timestamp': '2025-09-10 02:51:20.342546', 'step': 5669, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:20.396084', 'step': 5669, 'epoch': 3} +{'type': 'loss', 'content': 0.00040603900561109185, 'timestamp': '2025-09-10 02:51:20.402490', 'step': 5670, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:20.456547', 'step': 5670, 'epoch': 3} +{'type': 'loss', 'content': 0.013736828230321407, 'timestamp': '2025-09-10 02:51:20.458953', 'step': 5671, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:20.512548', 'step': 5671, 'epoch': 3} +{'type': 'loss', 'content': 0.00391897838562727, 'timestamp': '2025-09-10 02:51:20.518520', 'step': 5672, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:20.571205', 'step': 5672, 'epoch': 3} +{'type': 'loss', 'content': 0.002297357888892293, 'timestamp': '2025-09-10 02:51:20.573246', 'step': 5673, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:20.626809', 'step': 5673, 'epoch': 3} +{'type': 'loss', 'content': 0.0001491195143898949, 'timestamp': '2025-09-10 02:51:20.629149', 'step': 5674, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:20.683650', 'step': 5674, 'epoch': 3} +{'type': 'loss', 'content': 0.00038279959699139, 'timestamp': '2025-09-10 02:51:20.685961', 'step': 5675, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:51:20.747644', 'step': 5675, 'epoch': 3} +{'type': 'loss', 'content': 7.82403294579126e-05, 'timestamp': '2025-09-10 02:51:20.759530', 'step': 5676, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:20.812556', 'step': 5676, 'epoch': 3} +{'type': 'loss', 'content': 0.07603979110717773, 'timestamp': '2025-09-10 02:51:20.822880', 'step': 5677, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:51:20.884225', 'step': 5677, 'epoch': 3} +{'type': 'loss', 'content': 0.00014555695815943182, 'timestamp': '2025-09-10 02:51:20.895166', 'step': 5678, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:20.948219', 'step': 5678, 'epoch': 3} +{'type': 'loss', 'content': 0.014404937624931335, 'timestamp': '2025-09-10 02:51:20.950395', 'step': 5679, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:21.003081', 'step': 5679, 'epoch': 3} +{'type': 'loss', 'content': 0.0005144585738889873, 'timestamp': '2025-09-10 02:51:21.008960', 'step': 5680, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:21.061290', 'step': 5680, 'epoch': 3} +{'type': 'loss', 'content': 2.894156386901159e-05, 'timestamp': '2025-09-10 02:51:21.063588', 'step': 5681, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:21.118217', 'step': 5681, 'epoch': 3} +{'type': 'loss', 'content': 3.2053729228209704e-05, 'timestamp': '2025-09-10 02:51:21.128017', 'step': 5682, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 576], 'flops': 11520070000896.0}, 'timestamp': '2025-09-10 02:51:21.211670', 'step': 5682, 'epoch': 3} +{'type': 'loss', 'content': 0.00046526288497261703, 'timestamp': '2025-09-10 02:51:21.227199', 'step': 5683, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:21.280385', 'step': 5683, 'epoch': 3} +{'type': 'loss', 'content': 0.0005217741127125919, 'timestamp': '2025-09-10 02:51:21.287750', 'step': 5684, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:21.341218', 'step': 5684, 'epoch': 3} +{'type': 'loss', 'content': 0.001844012294895947, 'timestamp': '2025-09-10 02:51:21.343400', 'step': 5685, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:21.395989', 'step': 5685, 'epoch': 3} +{'type': 'loss', 'content': 8.369120769202709e-05, 'timestamp': '2025-09-10 02:51:21.398951', 'step': 5686, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:51:21.459287', 'step': 5686, 'epoch': 3} +{'type': 'loss', 'content': 7.50988838262856e-05, 'timestamp': '2025-09-10 02:51:21.470016', 'step': 5687, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:21.522770', 'step': 5687, 'epoch': 3} +{'type': 'loss', 'content': 6.87357533024624e-05, 'timestamp': '2025-09-10 02:51:21.528669', 'step': 5688, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:21.580885', 'step': 5688, 'epoch': 3} +{'type': 'loss', 'content': 0.005220349412411451, 'timestamp': '2025-09-10 02:51:21.583009', 'step': 5689, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:21.635745', 'step': 5689, 'epoch': 3} +{'type': 'loss', 'content': 0.0001456657046219334, 'timestamp': '2025-09-10 02:51:21.638148', 'step': 5690, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:21.690952', 'step': 5690, 'epoch': 3} +{'type': 'loss', 'content': 0.0006287423893809319, 'timestamp': '2025-09-10 02:51:21.693497', 'step': 5691, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:21.750421', 'step': 5691, 'epoch': 3} +{'type': 'loss', 'content': 0.001782461884431541, 'timestamp': '2025-09-10 02:51:21.756555', 'step': 5692, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:21.809175', 'step': 5692, 'epoch': 3} +{'type': 'loss', 'content': 0.00010749736247817054, 'timestamp': '2025-09-10 02:51:21.811234', 'step': 5693, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:21.864425', 'step': 5693, 'epoch': 3} +{'type': 'loss', 'content': 6.93696565576829e-05, 'timestamp': '2025-09-10 02:51:21.871023', 'step': 5694, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:21.924083', 'step': 5694, 'epoch': 3} +{'type': 'loss', 'content': 4.0167022234527394e-05, 'timestamp': '2025-09-10 02:51:21.926285', 'step': 5695, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:21.979643', 'step': 5695, 'epoch': 3} +{'type': 'loss', 'content': 0.0001911646395456046, 'timestamp': '2025-09-10 02:51:21.988658', 'step': 5696, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:22.041071', 'step': 5696, 'epoch': 3} +{'type': 'loss', 'content': 0.00010535532783251256, 'timestamp': '2025-09-10 02:51:22.043441', 'step': 5697, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:22.096443', 'step': 5697, 'epoch': 3} +{'type': 'loss', 'content': 3.176342943334021e-05, 'timestamp': '2025-09-10 02:51:22.098924', 'step': 5698, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:22.151447', 'step': 5698, 'epoch': 3} +{'type': 'loss', 'content': 9.99109324766323e-05, 'timestamp': '2025-09-10 02:51:22.154006', 'step': 5699, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:22.206533', 'step': 5699, 'epoch': 3} +{'type': 'loss', 'content': 0.02412462793290615, 'timestamp': '2025-09-10 02:51:22.212476', 'step': 5700, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:22.264918', 'step': 5700, 'epoch': 3} +{'type': 'loss', 'content': 0.0009148859535343945, 'timestamp': '2025-09-10 02:51:22.267178', 'step': 5701, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:22.325933', 'step': 5701, 'epoch': 3} +{'type': 'loss', 'content': 0.0013624512357637286, 'timestamp': '2025-09-10 02:51:22.336382', 'step': 5702, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:22.389500', 'step': 5702, 'epoch': 3} +{'type': 'loss', 'content': 4.784575503435917e-05, 'timestamp': '2025-09-10 02:51:22.391735', 'step': 5703, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:22.445558', 'step': 5703, 'epoch': 3} +{'type': 'loss', 'content': 8.188021456589922e-05, 'timestamp': '2025-09-10 02:51:22.455967', 'step': 5704, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:22.509452', 'step': 5704, 'epoch': 3} +{'type': 'loss', 'content': 0.0016375510022044182, 'timestamp': '2025-09-10 02:51:22.511636', 'step': 5705, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:22.564630', 'step': 5705, 'epoch': 3} +{'type': 'loss', 'content': 4.4782795157516375e-05, 'timestamp': '2025-09-10 02:51:22.566879', 'step': 5706, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:22.619543', 'step': 5706, 'epoch': 3} +{'type': 'loss', 'content': 0.00040555463056080043, 'timestamp': '2025-09-10 02:51:22.621751', 'step': 5707, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:22.674404', 'step': 5707, 'epoch': 3} +{'type': 'loss', 'content': 0.0751379132270813, 'timestamp': '2025-09-10 02:51:22.679989', 'step': 5708, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:22.732222', 'step': 5708, 'epoch': 3} +{'type': 'loss', 'content': 2.4346985810552724e-05, 'timestamp': '2025-09-10 02:51:22.735254', 'step': 5709, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:22.787882', 'step': 5709, 'epoch': 3} +{'type': 'loss', 'content': 0.00011057908704970032, 'timestamp': '2025-09-10 02:51:22.790925', 'step': 5710, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:22.844495', 'step': 5710, 'epoch': 3} +{'type': 'loss', 'content': 7.72880157455802e-05, 'timestamp': '2025-09-10 02:51:22.854142', 'step': 5711, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:22.907062', 'step': 5711, 'epoch': 3} +{'type': 'loss', 'content': 9.590354602551088e-05, 'timestamp': '2025-09-10 02:51:22.912790', 'step': 5712, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:22.966159', 'step': 5712, 'epoch': 3} +{'type': 'loss', 'content': 9.515549027128145e-05, 'timestamp': '2025-09-10 02:51:22.968599', 'step': 5713, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:23.023147', 'step': 5713, 'epoch': 3} +{'type': 'loss', 'content': 0.00010020162881119177, 'timestamp': '2025-09-10 02:51:23.032937', 'step': 5714, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:23.085893', 'step': 5714, 'epoch': 3} +{'type': 'loss', 'content': 0.0007063016528263688, 'timestamp': '2025-09-10 02:51:23.088229', 'step': 5715, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:23.142211', 'step': 5715, 'epoch': 3} +{'type': 'loss', 'content': 0.003517144825309515, 'timestamp': '2025-09-10 02:51:23.152621', 'step': 5716, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:23.205394', 'step': 5716, 'epoch': 3} +{'type': 'loss', 'content': 0.05250024423003197, 'timestamp': '2025-09-10 02:51:23.207683', 'step': 5717, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:23.260699', 'step': 5717, 'epoch': 3} +{'type': 'loss', 'content': 0.010178987868130207, 'timestamp': '2025-09-10 02:51:23.267267', 'step': 5718, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:23.320642', 'step': 5718, 'epoch': 3} +{'type': 'loss', 'content': 8.096639066934586e-05, 'timestamp': '2025-09-10 02:51:23.327045', 'step': 5719, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:23.380055', 'step': 5719, 'epoch': 3} +{'type': 'loss', 'content': 0.0007258725236169994, 'timestamp': '2025-09-10 02:51:23.385640', 'step': 5720, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:23.437504', 'step': 5720, 'epoch': 3} +{'type': 'loss', 'content': 8.05341187515296e-05, 'timestamp': '2025-09-10 02:51:23.439634', 'step': 5721, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:23.492589', 'step': 5721, 'epoch': 3} +{'type': 'loss', 'content': 0.00031913447310216725, 'timestamp': '2025-09-10 02:51:23.494767', 'step': 5722, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:23.548585', 'step': 5722, 'epoch': 3} +{'type': 'loss', 'content': 0.004898604936897755, 'timestamp': '2025-09-10 02:51:23.550777', 'step': 5723, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:23.603890', 'step': 5723, 'epoch': 3} +{'type': 'loss', 'content': 0.0018130905227735639, 'timestamp': '2025-09-10 02:51:23.611716', 'step': 5724, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:23.663944', 'step': 5724, 'epoch': 3} +{'type': 'loss', 'content': 0.00022090536367613822, 'timestamp': '2025-09-10 02:51:23.670662', 'step': 5725, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:23.723821', 'step': 5725, 'epoch': 3} +{'type': 'loss', 'content': 0.000243786969804205, 'timestamp': '2025-09-10 02:51:23.725975', 'step': 5726, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:23.778599', 'step': 5726, 'epoch': 3} +{'type': 'loss', 'content': 0.00016002205666154623, 'timestamp': '2025-09-10 02:51:23.780910', 'step': 5727, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:23.833318', 'step': 5727, 'epoch': 3} +{'type': 'loss', 'content': 0.0002175274130422622, 'timestamp': '2025-09-10 02:51:23.838884', 'step': 5728, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:23.891221', 'step': 5728, 'epoch': 3} +{'type': 'loss', 'content': 9.111921099247411e-05, 'timestamp': '2025-09-10 02:51:23.899688', 'step': 5729, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:23.952124', 'step': 5729, 'epoch': 3} +{'type': 'loss', 'content': 0.0296839140355587, 'timestamp': '2025-09-10 02:51:23.955288', 'step': 5730, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:24.009295', 'step': 5730, 'epoch': 3} +{'type': 'loss', 'content': 0.001510259578935802, 'timestamp': '2025-09-10 02:51:24.011600', 'step': 5731, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:24.064918', 'step': 5731, 'epoch': 3} +{'type': 'loss', 'content': 0.0001774175325408578, 'timestamp': '2025-09-10 02:51:24.073732', 'step': 5732, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:51:24.140523', 'step': 5732, 'epoch': 3} +{'type': 'loss', 'content': 0.013746106065809727, 'timestamp': '2025-09-10 02:51:24.154142', 'step': 5733, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:51:41.046821', 'step': 5733, 'epoch': 3} +{'type': 'pplx', 'content': 20482719.02242213, 'timestamp': '2025-09-10 02:51:41.049810', 'step': 5733, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:41.103976', 'step': 5733, 'epoch': 3} +{'type': 'loss', 'content': 0.010244191624224186, 'timestamp': '2025-09-10 02:51:41.106004', 'step': 5734, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:51:41.159201', 'step': 5734, 'epoch': 3} +{'type': 'loss', 'content': 0.0009839391568675637, 'timestamp': '2025-09-10 02:51:41.161433', 'step': 5735, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:41.214910', 'step': 5735, 'epoch': 3} +{'type': 'loss', 'content': 0.0067941914312541485, 'timestamp': '2025-09-10 02:51:41.220980', 'step': 5736, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:41.274429', 'step': 5736, 'epoch': 3} +{'type': 'loss', 'content': 0.0013374501140788198, 'timestamp': '2025-09-10 02:51:41.284929', 'step': 5737, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:51:41.346637', 'step': 5737, 'epoch': 3} +{'type': 'loss', 'content': 0.01822042651474476, 'timestamp': '2025-09-10 02:51:41.357665', 'step': 5738, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:51:41.425500', 'step': 5738, 'epoch': 3} +{'type': 'loss', 'content': 4.684946179622784e-05, 'timestamp': '2025-09-10 02:51:41.438064', 'step': 5739, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:41.496007', 'step': 5739, 'epoch': 3} +{'type': 'loss', 'content': 0.015893857926130295, 'timestamp': '2025-09-10 02:51:41.507250', 'step': 5740, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:41.559909', 'step': 5740, 'epoch': 3} +{'type': 'loss', 'content': 0.00013877787569072098, 'timestamp': '2025-09-10 02:51:41.569894', 'step': 5741, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:51:41.631171', 'step': 5741, 'epoch': 3} +{'type': 'loss', 'content': 0.0012965899659320712, 'timestamp': '2025-09-10 02:51:41.642291', 'step': 5742, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:41.695033', 'step': 5742, 'epoch': 3} +{'type': 'loss', 'content': 0.00033993370016105473, 'timestamp': '2025-09-10 02:51:41.703273', 'step': 5743, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:41.756556', 'step': 5743, 'epoch': 3} +{'type': 'loss', 'content': 0.00015266095579136163, 'timestamp': '2025-09-10 02:51:41.763776', 'step': 5744, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:41.815884', 'step': 5744, 'epoch': 3} +{'type': 'loss', 'content': 0.0008721463964320719, 'timestamp': '2025-09-10 02:51:41.817790', 'step': 5745, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:41.870584', 'step': 5745, 'epoch': 3} +{'type': 'loss', 'content': 0.0016237528761848807, 'timestamp': '2025-09-10 02:51:41.872772', 'step': 5746, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:41.925005', 'step': 5746, 'epoch': 3} +{'type': 'loss', 'content': 0.0007079765782691538, 'timestamp': '2025-09-10 02:51:41.927049', 'step': 5747, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:41.979407', 'step': 5747, 'epoch': 3} +{'type': 'loss', 'content': 0.002578176325187087, 'timestamp': '2025-09-10 02:51:41.985096', 'step': 5748, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:42.037779', 'step': 5748, 'epoch': 3} +{'type': 'loss', 'content': 0.0005948370671831071, 'timestamp': '2025-09-10 02:51:42.039973', 'step': 5749, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:42.092453', 'step': 5749, 'epoch': 3} +{'type': 'loss', 'content': 0.0009538509184494615, 'timestamp': '2025-09-10 02:51:42.094560', 'step': 5750, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:42.149335', 'step': 5750, 'epoch': 3} +{'type': 'loss', 'content': 0.001113768434152007, 'timestamp': '2025-09-10 02:51:42.159146', 'step': 5751, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:42.211904', 'step': 5751, 'epoch': 3} +{'type': 'loss', 'content': 3.385928357602097e-05, 'timestamp': '2025-09-10 02:51:42.217605', 'step': 5752, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:42.269610', 'step': 5752, 'epoch': 3} +{'type': 'loss', 'content': 0.002059690887108445, 'timestamp': '2025-09-10 02:51:42.271561', 'step': 5753, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:42.325161', 'step': 5753, 'epoch': 3} +{'type': 'loss', 'content': 0.014517420902848244, 'timestamp': '2025-09-10 02:51:42.334556', 'step': 5754, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:51:42.395940', 'step': 5754, 'epoch': 3} +{'type': 'loss', 'content': 0.0003307890146970749, 'timestamp': '2025-09-10 02:51:42.407049', 'step': 5755, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:42.460336', 'step': 5755, 'epoch': 3} +{'type': 'loss', 'content': 0.0009655895410105586, 'timestamp': '2025-09-10 02:51:42.467548', 'step': 5756, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:42.519814', 'step': 5756, 'epoch': 3} +{'type': 'loss', 'content': 0.0009693386382423341, 'timestamp': '2025-09-10 02:51:42.521901', 'step': 5757, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:42.574571', 'step': 5757, 'epoch': 3} +{'type': 'loss', 'content': 0.0013732363004237413, 'timestamp': '2025-09-10 02:51:42.576743', 'step': 5758, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:42.629751', 'step': 5758, 'epoch': 3} +{'type': 'loss', 'content': 0.00044860862544737756, 'timestamp': '2025-09-10 02:51:42.639308', 'step': 5759, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:51:42.707895', 'step': 5759, 'epoch': 3} +{'type': 'loss', 'content': 0.0012755978386849165, 'timestamp': '2025-09-10 02:51:42.721355', 'step': 5760, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:42.773804', 'step': 5760, 'epoch': 3} +{'type': 'loss', 'content': 0.00041840082849375904, 'timestamp': '2025-09-10 02:51:42.775872', 'step': 5761, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:42.828350', 'step': 5761, 'epoch': 3} +{'type': 'loss', 'content': 0.05786401778459549, 'timestamp': '2025-09-10 02:51:42.830559', 'step': 5762, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:42.883887', 'step': 5762, 'epoch': 3} +{'type': 'loss', 'content': 0.0017297634622082114, 'timestamp': '2025-09-10 02:51:42.893469', 'step': 5763, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:42.947075', 'step': 5763, 'epoch': 3} +{'type': 'loss', 'content': 0.0013945087557658553, 'timestamp': '2025-09-10 02:51:42.952714', 'step': 5764, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:43.004534', 'step': 5764, 'epoch': 3} +{'type': 'loss', 'content': 0.0003477123682387173, 'timestamp': '2025-09-10 02:51:43.011146', 'step': 5765, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:43.069471', 'step': 5765, 'epoch': 3} +{'type': 'loss', 'content': 0.0005236553261056542, 'timestamp': '2025-09-10 02:51:43.079899', 'step': 5766, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:43.133118', 'step': 5766, 'epoch': 3} +{'type': 'loss', 'content': 0.0005685584037564695, 'timestamp': '2025-09-10 02:51:43.135571', 'step': 5767, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:43.193569', 'step': 5767, 'epoch': 3} +{'type': 'loss', 'content': 0.002259068423882127, 'timestamp': '2025-09-10 02:51:43.204770', 'step': 5768, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:43.257614', 'step': 5768, 'epoch': 3} +{'type': 'loss', 'content': 0.0032246175687760115, 'timestamp': '2025-09-10 02:51:43.264146', 'step': 5769, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:43.316972', 'step': 5769, 'epoch': 3} +{'type': 'loss', 'content': 0.004127685911953449, 'timestamp': '2025-09-10 02:51:43.319926', 'step': 5770, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:43.372386', 'step': 5770, 'epoch': 3} +{'type': 'loss', 'content': 0.001799857011064887, 'timestamp': '2025-09-10 02:51:43.375441', 'step': 5771, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:43.428120', 'step': 5771, 'epoch': 3} +{'type': 'loss', 'content': 0.0010488808620721102, 'timestamp': '2025-09-10 02:51:43.433913', 'step': 5772, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:43.485775', 'step': 5772, 'epoch': 3} +{'type': 'loss', 'content': 0.0012435141252353787, 'timestamp': '2025-09-10 02:51:43.488814', 'step': 5773, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:43.541380', 'step': 5773, 'epoch': 3} +{'type': 'loss', 'content': 0.0005501917912624776, 'timestamp': '2025-09-10 02:51:43.544522', 'step': 5774, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:43.596879', 'step': 5774, 'epoch': 3} +{'type': 'loss', 'content': 0.0003421418077778071, 'timestamp': '2025-09-10 02:51:43.599106', 'step': 5775, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:51:43.659216', 'step': 5775, 'epoch': 3} +{'type': 'loss', 'content': 0.0022140939254313707, 'timestamp': '2025-09-10 02:51:43.670706', 'step': 5776, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:43.722932', 'step': 5776, 'epoch': 3} +{'type': 'loss', 'content': 0.009579942561686039, 'timestamp': '2025-09-10 02:51:43.729525', 'step': 5777, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:51:43.802715', 'step': 5777, 'epoch': 3} +{'type': 'loss', 'content': 0.00020193222735542804, 'timestamp': '2025-09-10 02:51:43.816434', 'step': 5778, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:43.869534', 'step': 5778, 'epoch': 3} +{'type': 'loss', 'content': 0.025681912899017334, 'timestamp': '2025-09-10 02:51:43.871568', 'step': 5779, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:43.924417', 'step': 5779, 'epoch': 3} +{'type': 'loss', 'content': 0.001249152235686779, 'timestamp': '2025-09-10 02:51:43.930121', 'step': 5780, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:43.982074', 'step': 5780, 'epoch': 3} +{'type': 'loss', 'content': 0.004935614764690399, 'timestamp': '2025-09-10 02:51:43.984271', 'step': 5781, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:44.037775', 'step': 5781, 'epoch': 3} +{'type': 'loss', 'content': 0.0002590807562228292, 'timestamp': '2025-09-10 02:51:44.044372', 'step': 5782, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:44.097589', 'step': 5782, 'epoch': 3} +{'type': 'loss', 'content': 0.0007769867079332471, 'timestamp': '2025-09-10 02:51:44.100096', 'step': 5783, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:44.152771', 'step': 5783, 'epoch': 3} +{'type': 'loss', 'content': 0.0003105894138570875, 'timestamp': '2025-09-10 02:51:44.158475', 'step': 5784, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:44.210459', 'step': 5784, 'epoch': 3} +{'type': 'loss', 'content': 0.044892568141222, 'timestamp': '2025-09-10 02:51:44.217239', 'step': 5785, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:44.269678', 'step': 5785, 'epoch': 3} +{'type': 'loss', 'content': 0.003026153426617384, 'timestamp': '2025-09-10 02:51:44.272761', 'step': 5786, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:44.324778', 'step': 5786, 'epoch': 3} +{'type': 'loss', 'content': 0.0016270544147118926, 'timestamp': '2025-09-10 02:51:44.327957', 'step': 5787, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:44.380295', 'step': 5787, 'epoch': 3} +{'type': 'loss', 'content': 0.009035581722855568, 'timestamp': '2025-09-10 02:51:44.385855', 'step': 5788, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:44.437620', 'step': 5788, 'epoch': 3} +{'type': 'loss', 'content': 0.004763353615999222, 'timestamp': '2025-09-10 02:51:44.439661', 'step': 5789, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:44.492022', 'step': 5789, 'epoch': 3} +{'type': 'loss', 'content': 0.00015156091831158847, 'timestamp': '2025-09-10 02:51:44.494897', 'step': 5790, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:44.551052', 'step': 5790, 'epoch': 3} +{'type': 'loss', 'content': 0.00022842036560177803, 'timestamp': '2025-09-10 02:51:44.553188', 'step': 5791, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:44.605894', 'step': 5791, 'epoch': 3} +{'type': 'loss', 'content': 0.00035799542092718184, 'timestamp': '2025-09-10 02:51:44.611459', 'step': 5792, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:44.663067', 'step': 5792, 'epoch': 3} +{'type': 'loss', 'content': 0.0005225968197919428, 'timestamp': '2025-09-10 02:51:44.666051', 'step': 5793, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:44.718390', 'step': 5793, 'epoch': 3} +{'type': 'loss', 'content': 0.00023316919396165758, 'timestamp': '2025-09-10 02:51:44.721696', 'step': 5794, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:44.773877', 'step': 5794, 'epoch': 3} +{'type': 'loss', 'content': 0.0001692460646154359, 'timestamp': '2025-09-10 02:51:44.775993', 'step': 5795, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:51:44.836577', 'step': 5795, 'epoch': 3} +{'type': 'loss', 'content': 0.00017555321392137557, 'timestamp': '2025-09-10 02:51:44.848291', 'step': 5796, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:44.900769', 'step': 5796, 'epoch': 3} +{'type': 'loss', 'content': 0.00014712891425006092, 'timestamp': '2025-09-10 02:51:44.903779', 'step': 5797, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:44.956480', 'step': 5797, 'epoch': 3} +{'type': 'loss', 'content': 0.0004672048962675035, 'timestamp': '2025-09-10 02:51:44.958793', 'step': 5798, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:45.011724', 'step': 5798, 'epoch': 3} +{'type': 'loss', 'content': 0.0013020833721384406, 'timestamp': '2025-09-10 02:51:45.019950', 'step': 5799, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:45.072629', 'step': 5799, 'epoch': 3} +{'type': 'loss', 'content': 0.00015009155322331935, 'timestamp': '2025-09-10 02:51:45.078530', 'step': 5800, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:45.130705', 'step': 5800, 'epoch': 3} +{'type': 'loss', 'content': 0.0018723929533734918, 'timestamp': '2025-09-10 02:51:45.138976', 'step': 5801, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:45.192391', 'step': 5801, 'epoch': 3} +{'type': 'loss', 'content': 0.0036644097417593002, 'timestamp': '2025-09-10 02:51:45.199130', 'step': 5802, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:45.252770', 'step': 5802, 'epoch': 3} +{'type': 'loss', 'content': 0.00022594413894694299, 'timestamp': '2025-09-10 02:51:45.255057', 'step': 5803, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:45.307878', 'step': 5803, 'epoch': 3} +{'type': 'loss', 'content': 0.0008001906680874527, 'timestamp': '2025-09-10 02:51:45.316929', 'step': 5804, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:45.369507', 'step': 5804, 'epoch': 3} +{'type': 'loss', 'content': 0.0003338546375744045, 'timestamp': '2025-09-10 02:51:45.371692', 'step': 5805, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:45.424449', 'step': 5805, 'epoch': 3} +{'type': 'loss', 'content': 0.000219851266592741, 'timestamp': '2025-09-10 02:51:45.432846', 'step': 5806, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:45.486628', 'step': 5806, 'epoch': 3} +{'type': 'loss', 'content': 0.0014241269091144204, 'timestamp': '2025-09-10 02:51:45.496215', 'step': 5807, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:45.549146', 'step': 5807, 'epoch': 3} +{'type': 'loss', 'content': 0.002949876943603158, 'timestamp': '2025-09-10 02:51:45.555014', 'step': 5808, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:45.607198', 'step': 5808, 'epoch': 3} +{'type': 'loss', 'content': 0.0029255712870508432, 'timestamp': '2025-09-10 02:51:45.613961', 'step': 5809, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:45.667230', 'step': 5809, 'epoch': 3} +{'type': 'loss', 'content': 0.00041943046380765736, 'timestamp': '2025-09-10 02:51:45.669664', 'step': 5810, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:45.722893', 'step': 5810, 'epoch': 3} +{'type': 'loss', 'content': 0.0004880022897850722, 'timestamp': '2025-09-10 02:51:45.725160', 'step': 5811, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:51:45.779473', 'step': 5811, 'epoch': 3} +{'type': 'loss', 'content': 0.0005901859840378165, 'timestamp': '2025-09-10 02:51:45.790075', 'step': 5812, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:51:45.856681', 'step': 5812, 'epoch': 3} +{'type': 'loss', 'content': 0.00024833696079440415, 'timestamp': '2025-09-10 02:51:45.869943', 'step': 5813, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:45.923333', 'step': 5813, 'epoch': 3} +{'type': 'loss', 'content': 0.0002008928422583267, 'timestamp': '2025-09-10 02:51:45.925339', 'step': 5814, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:45.978660', 'step': 5814, 'epoch': 3} +{'type': 'loss', 'content': 0.0006987524102441967, 'timestamp': '2025-09-10 02:51:45.981529', 'step': 5815, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:46.034014', 'step': 5815, 'epoch': 3} +{'type': 'loss', 'content': 0.0054719215258955956, 'timestamp': '2025-09-10 02:51:46.039949', 'step': 5816, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:46.096034', 'step': 5816, 'epoch': 3} +{'type': 'loss', 'content': 0.001059025409631431, 'timestamp': '2025-09-10 02:51:46.107300', 'step': 5817, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:46.160328', 'step': 5817, 'epoch': 3} +{'type': 'loss', 'content': 0.0013769164215773344, 'timestamp': '2025-09-10 02:51:46.166903', 'step': 5818, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:46.220236', 'step': 5818, 'epoch': 3} +{'type': 'loss', 'content': 0.005804088432341814, 'timestamp': '2025-09-10 02:51:46.222536', 'step': 5819, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:46.275048', 'step': 5819, 'epoch': 3} +{'type': 'loss', 'content': 0.00022570193686988205, 'timestamp': '2025-09-10 02:51:46.281015', 'step': 5820, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:51:46.339754', 'step': 5820, 'epoch': 3} +{'type': 'loss', 'content': 0.0003626677207648754, 'timestamp': '2025-09-10 02:51:46.351214', 'step': 5821, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:46.405330', 'step': 5821, 'epoch': 3} +{'type': 'loss', 'content': 0.00018641487986315042, 'timestamp': '2025-09-10 02:51:46.414379', 'step': 5822, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:46.468736', 'step': 5822, 'epoch': 3} +{'type': 'loss', 'content': 0.0007334387628361583, 'timestamp': '2025-09-10 02:51:46.471170', 'step': 5823, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:46.525196', 'step': 5823, 'epoch': 3} +{'type': 'loss', 'content': 0.00023691621026955545, 'timestamp': '2025-09-10 02:51:46.531365', 'step': 5824, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:46.584010', 'step': 5824, 'epoch': 3} +{'type': 'loss', 'content': 0.0002022029075305909, 'timestamp': '2025-09-10 02:51:46.586148', 'step': 5825, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:46.639045', 'step': 5825, 'epoch': 3} +{'type': 'loss', 'content': 0.030149130150675774, 'timestamp': '2025-09-10 02:51:46.641222', 'step': 5826, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:46.693774', 'step': 5826, 'epoch': 3} +{'type': 'loss', 'content': 0.00018455949611961842, 'timestamp': '2025-09-10 02:51:46.695778', 'step': 5827, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:46.748531', 'step': 5827, 'epoch': 3} +{'type': 'loss', 'content': 0.0003206153050996363, 'timestamp': '2025-09-10 02:51:46.754360', 'step': 5828, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:46.806908', 'step': 5828, 'epoch': 3} +{'type': 'loss', 'content': 0.0010543627431616187, 'timestamp': '2025-09-10 02:51:46.809167', 'step': 5829, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:46.862050', 'step': 5829, 'epoch': 3} +{'type': 'loss', 'content': 0.0002578561834525317, 'timestamp': '2025-09-10 02:51:46.865017', 'step': 5830, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:46.920190', 'step': 5830, 'epoch': 3} +{'type': 'loss', 'content': 0.0006444288301281631, 'timestamp': '2025-09-10 02:51:46.922352', 'step': 5831, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:46.975669', 'step': 5831, 'epoch': 3} +{'type': 'loss', 'content': 0.0003105739306192845, 'timestamp': '2025-09-10 02:51:46.981620', 'step': 5832, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:47.033950', 'step': 5832, 'epoch': 3} +{'type': 'loss', 'content': 0.003395001171156764, 'timestamp': '2025-09-10 02:51:47.035946', 'step': 5833, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:47.089245', 'step': 5833, 'epoch': 3} +{'type': 'loss', 'content': 0.0013299237471073866, 'timestamp': '2025-09-10 02:51:47.092087', 'step': 5834, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:47.146140', 'step': 5834, 'epoch': 3} +{'type': 'loss', 'content': 0.00017916594515554607, 'timestamp': '2025-09-10 02:51:47.148319', 'step': 5835, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:47.201356', 'step': 5835, 'epoch': 3} +{'type': 'loss', 'content': 0.008848682045936584, 'timestamp': '2025-09-10 02:51:47.207280', 'step': 5836, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:47.259684', 'step': 5836, 'epoch': 3} +{'type': 'loss', 'content': 0.0012626085663214326, 'timestamp': '2025-09-10 02:51:47.262495', 'step': 5837, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:47.314931', 'step': 5837, 'epoch': 3} +{'type': 'loss', 'content': 0.00020191511430311948, 'timestamp': '2025-09-10 02:51:47.317975', 'step': 5838, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:47.375622', 'step': 5838, 'epoch': 3} +{'type': 'loss', 'content': 0.0024575458373874426, 'timestamp': '2025-09-10 02:51:47.386055', 'step': 5839, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:47.439173', 'step': 5839, 'epoch': 3} +{'type': 'loss', 'content': 0.0005852826288901269, 'timestamp': '2025-09-10 02:51:47.445096', 'step': 5840, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:47.496952', 'step': 5840, 'epoch': 3} +{'type': 'loss', 'content': 0.0016624733107164502, 'timestamp': '2025-09-10 02:51:47.499088', 'step': 5841, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:47.552019', 'step': 5841, 'epoch': 3} +{'type': 'loss', 'content': 0.0002822458336595446, 'timestamp': '2025-09-10 02:51:47.554121', 'step': 5842, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:51:47.613034', 'step': 5842, 'epoch': 3} +{'type': 'loss', 'content': 0.0001824253995437175, 'timestamp': '2025-09-10 02:51:47.623349', 'step': 5843, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:47.677365', 'step': 5843, 'epoch': 3} +{'type': 'loss', 'content': 0.00014550441119354218, 'timestamp': '2025-09-10 02:51:47.683325', 'step': 5844, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:47.735165', 'step': 5844, 'epoch': 3} +{'type': 'loss', 'content': 0.003948107361793518, 'timestamp': '2025-09-10 02:51:47.741668', 'step': 5845, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:47.794560', 'step': 5845, 'epoch': 3} +{'type': 'loss', 'content': 0.0008949020993895829, 'timestamp': '2025-09-10 02:51:47.796791', 'step': 5846, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:47.849535', 'step': 5846, 'epoch': 3} +{'type': 'loss', 'content': 0.002585839480161667, 'timestamp': '2025-09-10 02:51:47.851836', 'step': 5847, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:47.905040', 'step': 5847, 'epoch': 3} +{'type': 'loss', 'content': 0.00041013467125594616, 'timestamp': '2025-09-10 02:51:47.910692', 'step': 5848, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:47.962702', 'step': 5848, 'epoch': 3} +{'type': 'loss', 'content': 0.01508873701095581, 'timestamp': '2025-09-10 02:51:47.964942', 'step': 5849, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:48.018000', 'step': 5849, 'epoch': 3} +{'type': 'loss', 'content': 0.001222583232447505, 'timestamp': '2025-09-10 02:51:48.019983', 'step': 5850, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:51:48.086194', 'step': 5850, 'epoch': 3} +{'type': 'loss', 'content': 0.0003196625621058047, 'timestamp': '2025-09-10 02:51:48.098443', 'step': 5851, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:48.151415', 'step': 5851, 'epoch': 3} +{'type': 'loss', 'content': 0.0007270271889865398, 'timestamp': '2025-09-10 02:51:48.157035', 'step': 5852, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:51:48.215674', 'step': 5852, 'epoch': 3} +{'type': 'loss', 'content': 0.0029898055363446474, 'timestamp': '2025-09-10 02:51:48.227258', 'step': 5853, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:48.279675', 'step': 5853, 'epoch': 3} +{'type': 'loss', 'content': 7.705479947617278e-05, 'timestamp': '2025-09-10 02:51:48.281986', 'step': 5854, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:48.334896', 'step': 5854, 'epoch': 3} +{'type': 'loss', 'content': 0.006672864314168692, 'timestamp': '2025-09-10 02:51:48.337836', 'step': 5855, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:51:48.390988', 'step': 5855, 'epoch': 3} +{'type': 'loss', 'content': 0.0005213103140704334, 'timestamp': '2025-09-10 02:51:48.399753', 'step': 5856, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:48.453427', 'step': 5856, 'epoch': 3} +{'type': 'loss', 'content': 0.0009325035498477519, 'timestamp': '2025-09-10 02:51:48.455548', 'step': 5857, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:48.508801', 'step': 5857, 'epoch': 3} +{'type': 'loss', 'content': 0.005977279506623745, 'timestamp': '2025-09-10 02:51:48.511361', 'step': 5858, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:48.564658', 'step': 5858, 'epoch': 3} +{'type': 'loss', 'content': 5.17429034516681e-05, 'timestamp': '2025-09-10 02:51:48.566855', 'step': 5859, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:51:48.619593', 'step': 5859, 'epoch': 3} +{'type': 'loss', 'content': 0.00032618644763715565, 'timestamp': '2025-09-10 02:51:48.625760', 'step': 5860, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:48.678549', 'step': 5860, 'epoch': 3} +{'type': 'loss', 'content': 9.650614083511755e-05, 'timestamp': '2025-09-10 02:51:48.680539', 'step': 5861, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:48.733581', 'step': 5861, 'epoch': 3} +{'type': 'loss', 'content': 0.003374809166416526, 'timestamp': '2025-09-10 02:51:48.735738', 'step': 5862, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:48.788804', 'step': 5862, 'epoch': 3} +{'type': 'loss', 'content': 0.004158054944127798, 'timestamp': '2025-09-10 02:51:48.791668', 'step': 5863, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:48.845069', 'step': 5863, 'epoch': 3} +{'type': 'loss', 'content': 0.0018565886421129107, 'timestamp': '2025-09-10 02:51:48.850818', 'step': 5864, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:48.903807', 'step': 5864, 'epoch': 3} +{'type': 'loss', 'content': 0.0003595920279622078, 'timestamp': '2025-09-10 02:51:48.906068', 'step': 5865, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:51:48.959243', 'step': 5865, 'epoch': 3} +{'type': 'loss', 'content': 0.024408187717199326, 'timestamp': '2025-09-10 02:51:48.968864', 'step': 5866, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:49.022018', 'step': 5866, 'epoch': 3} +{'type': 'loss', 'content': 0.00012649646669160575, 'timestamp': '2025-09-10 02:51:49.024374', 'step': 5867, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:49.077918', 'step': 5867, 'epoch': 3} +{'type': 'loss', 'content': 8.017767686396837e-05, 'timestamp': '2025-09-10 02:51:49.084051', 'step': 5868, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:49.136551', 'step': 5868, 'epoch': 3} +{'type': 'loss', 'content': 0.0006468692445196211, 'timestamp': '2025-09-10 02:51:49.142987', 'step': 5869, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:49.195713', 'step': 5869, 'epoch': 3} +{'type': 'loss', 'content': 0.0004957037162967026, 'timestamp': '2025-09-10 02:51:49.198630', 'step': 5870, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:49.251648', 'step': 5870, 'epoch': 3} +{'type': 'loss', 'content': 0.00030178995802998543, 'timestamp': '2025-09-10 02:51:49.253868', 'step': 5871, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:51:49.306143', 'step': 5871, 'epoch': 3} +{'type': 'loss', 'content': 0.00027214823057875037, 'timestamp': '2025-09-10 02:51:49.311746', 'step': 5872, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:49.364033', 'step': 5872, 'epoch': 3} +{'type': 'loss', 'content': 0.001998594496399164, 'timestamp': '2025-09-10 02:51:49.365982', 'step': 5873, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:51:49.418353', 'step': 5873, 'epoch': 3} +{'type': 'loss', 'content': 0.0003163004294037819, 'timestamp': '2025-09-10 02:51:49.420458', 'step': 5874, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:51:49.473440', 'step': 5874, 'epoch': 3} +{'type': 'loss', 'content': 0.0031887299846857786, 'timestamp': '2025-09-10 02:51:49.480138', 'step': 5875, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:49.532629', 'step': 5875, 'epoch': 3} +{'type': 'loss', 'content': 0.0019040309125557542, 'timestamp': '2025-09-10 02:51:49.538223', 'step': 5876, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:51:49.589777', 'step': 5876, 'epoch': 3} +{'type': 'loss', 'content': 0.0017279988387599587, 'timestamp': '2025-09-10 02:51:49.592813', 'step': 5877, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:51:49.646174', 'step': 5877, 'epoch': 3} +{'type': 'loss', 'content': 0.00021360639948397875, 'timestamp': '2025-09-10 02:51:49.648274', 'step': 5878, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:51:49.701324', 'step': 5878, 'epoch': 3} +{'type': 'loss', 'content': 0.00016497794422321022, 'timestamp': '2025-09-10 02:51:49.703391', 'step': 5879, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:51:49.756052', 'step': 5879, 'epoch': 3} +{'type': 'loss', 'content': 0.0001751897216308862, 'timestamp': '2025-09-10 02:51:49.761695', 'step': 5880, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:52:06.495393', 'step': 5880, 'epoch': 3} +{'type': 'pplx', 'content': 22582352.436606582, 'timestamp': '2025-09-10 02:52:06.498475', 'step': 5880, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:06.553499', 'step': 5880, 'epoch': 3} +{'type': 'loss', 'content': 0.0008500745752826333, 'timestamp': '2025-09-10 02:52:06.555694', 'step': 5881, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:06.609318', 'step': 5881, 'epoch': 3} +{'type': 'loss', 'content': 0.00237825489602983, 'timestamp': '2025-09-10 02:52:06.615373', 'step': 5882, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:52:06.683587', 'step': 5882, 'epoch': 3} +{'type': 'loss', 'content': 0.0004402727645356208, 'timestamp': '2025-09-10 02:52:06.696184', 'step': 5883, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:06.750171', 'step': 5883, 'epoch': 3} +{'type': 'loss', 'content': 0.00014934144564904273, 'timestamp': '2025-09-10 02:52:06.757019', 'step': 5884, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:06.814059', 'step': 5884, 'epoch': 3} +{'type': 'loss', 'content': 7.489151175832376e-05, 'timestamp': '2025-09-10 02:52:06.816864', 'step': 5885, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:06.879648', 'step': 5885, 'epoch': 3} +{'type': 'loss', 'content': 0.00014372929581440985, 'timestamp': '2025-09-10 02:52:06.889378', 'step': 5886, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:06.951898', 'step': 5886, 'epoch': 3} +{'type': 'loss', 'content': 0.00036258361069485545, 'timestamp': '2025-09-10 02:52:06.959600', 'step': 5887, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:07.017794', 'step': 5887, 'epoch': 3} +{'type': 'loss', 'content': 0.0001417129678884521, 'timestamp': '2025-09-10 02:52:07.024036', 'step': 5888, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:52:07.092241', 'step': 5888, 'epoch': 3} +{'type': 'loss', 'content': 0.01438497006893158, 'timestamp': '2025-09-10 02:52:07.106040', 'step': 5889, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:07.161488', 'step': 5889, 'epoch': 3} +{'type': 'loss', 'content': 0.00014281366020441055, 'timestamp': '2025-09-10 02:52:07.169058', 'step': 5890, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:07.225419', 'step': 5890, 'epoch': 3} +{'type': 'loss', 'content': 0.002134530106559396, 'timestamp': '2025-09-10 02:52:07.235247', 'step': 5891, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:07.292899', 'step': 5891, 'epoch': 3} +{'type': 'loss', 'content': 0.0002158203424187377, 'timestamp': '2025-09-10 02:52:07.298907', 'step': 5892, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:07.352829', 'step': 5892, 'epoch': 3} +{'type': 'loss', 'content': 6.779220711905509e-05, 'timestamp': '2025-09-10 02:52:07.355341', 'step': 5893, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:07.408285', 'step': 5893, 'epoch': 3} +{'type': 'loss', 'content': 0.0001536442432552576, 'timestamp': '2025-09-10 02:52:07.411415', 'step': 5894, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:07.469134', 'step': 5894, 'epoch': 3} +{'type': 'loss', 'content': 8.067933231359348e-05, 'timestamp': '2025-09-10 02:52:07.472237', 'step': 5895, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:52:07.555434', 'step': 5895, 'epoch': 3} +{'type': 'loss', 'content': 9.683582902653143e-05, 'timestamp': '2025-09-10 02:52:07.570327', 'step': 5896, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:07.627918', 'step': 5896, 'epoch': 3} +{'type': 'loss', 'content': 0.00027145136846229434, 'timestamp': '2025-09-10 02:52:07.636046', 'step': 5897, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:07.692103', 'step': 5897, 'epoch': 3} +{'type': 'loss', 'content': 7.75391745264642e-05, 'timestamp': '2025-09-10 02:52:07.694163', 'step': 5898, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:07.746973', 'step': 5898, 'epoch': 3} +{'type': 'loss', 'content': 0.006268133409321308, 'timestamp': '2025-09-10 02:52:07.749173', 'step': 5899, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:07.802796', 'step': 5899, 'epoch': 3} +{'type': 'loss', 'content': 0.0002908637106884271, 'timestamp': '2025-09-10 02:52:07.808926', 'step': 5900, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:07.862815', 'step': 5900, 'epoch': 3} +{'type': 'loss', 'content': 0.00019580138905439526, 'timestamp': '2025-09-10 02:52:07.869841', 'step': 5901, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:07.923100', 'step': 5901, 'epoch': 3} +{'type': 'loss', 'content': 0.0005077117239125073, 'timestamp': '2025-09-10 02:52:07.925125', 'step': 5902, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:07.978674', 'step': 5902, 'epoch': 3} +{'type': 'loss', 'content': 0.008855224587023258, 'timestamp': '2025-09-10 02:52:07.980861', 'step': 5903, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:08.033648', 'step': 5903, 'epoch': 3} +{'type': 'loss', 'content': 0.00028505848604254425, 'timestamp': '2025-09-10 02:52:08.039857', 'step': 5904, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:08.092268', 'step': 5904, 'epoch': 3} +{'type': 'loss', 'content': 0.00020344446238595992, 'timestamp': '2025-09-10 02:52:08.102285', 'step': 5905, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:08.155900', 'step': 5905, 'epoch': 3} +{'type': 'loss', 'content': 7.506937981816009e-05, 'timestamp': '2025-09-10 02:52:08.157942', 'step': 5906, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:08.216684', 'step': 5906, 'epoch': 3} +{'type': 'loss', 'content': 0.001403413130901754, 'timestamp': '2025-09-10 02:52:08.227107', 'step': 5907, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:08.280004', 'step': 5907, 'epoch': 3} +{'type': 'loss', 'content': 0.0032349787652492523, 'timestamp': '2025-09-10 02:52:08.285877', 'step': 5908, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:52:08.374380', 'step': 5908, 'epoch': 3} +{'type': 'loss', 'content': 0.0011857036733999848, 'timestamp': '2025-09-10 02:52:08.393369', 'step': 5909, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:08.447821', 'step': 5909, 'epoch': 3} +{'type': 'loss', 'content': 0.00016681969282217324, 'timestamp': '2025-09-10 02:52:08.454867', 'step': 5910, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:08.508893', 'step': 5910, 'epoch': 3} +{'type': 'loss', 'content': 0.0009027948835864663, 'timestamp': '2025-09-10 02:52:08.511575', 'step': 5911, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:52:08.577929', 'step': 5911, 'epoch': 3} +{'type': 'loss', 'content': 0.0007573505281470716, 'timestamp': '2025-09-10 02:52:08.590941', 'step': 5912, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:08.645856', 'step': 5912, 'epoch': 3} +{'type': 'loss', 'content': 0.0008393190801143646, 'timestamp': '2025-09-10 02:52:08.647978', 'step': 5913, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:08.702176', 'step': 5913, 'epoch': 3} +{'type': 'loss', 'content': 0.0008536014938727021, 'timestamp': '2025-09-10 02:52:08.704366', 'step': 5914, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:08.763158', 'step': 5914, 'epoch': 3} +{'type': 'loss', 'content': 0.0439261794090271, 'timestamp': '2025-09-10 02:52:08.773605', 'step': 5915, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:52:08.846260', 'step': 5915, 'epoch': 3} +{'type': 'loss', 'content': 0.0011814313475042582, 'timestamp': '2025-09-10 02:52:08.860489', 'step': 5916, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:52:08.921010', 'step': 5916, 'epoch': 3} +{'type': 'loss', 'content': 4.090380753041245e-05, 'timestamp': '2025-09-10 02:52:08.933005', 'step': 5917, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:08.986309', 'step': 5917, 'epoch': 3} +{'type': 'loss', 'content': 0.00017138940165750682, 'timestamp': '2025-09-10 02:52:08.988682', 'step': 5918, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:09.042107', 'step': 5918, 'epoch': 3} +{'type': 'loss', 'content': 0.01822705566883087, 'timestamp': '2025-09-10 02:52:09.051736', 'step': 5919, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:09.105232', 'step': 5919, 'epoch': 3} +{'type': 'loss', 'content': 0.0010350747033953667, 'timestamp': '2025-09-10 02:52:09.113830', 'step': 5920, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:09.167079', 'step': 5920, 'epoch': 3} +{'type': 'loss', 'content': 0.017399441450834274, 'timestamp': '2025-09-10 02:52:09.174906', 'step': 5921, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:09.228229', 'step': 5921, 'epoch': 3} +{'type': 'loss', 'content': 0.00032641488360241055, 'timestamp': '2025-09-10 02:52:09.237881', 'step': 5922, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:09.291607', 'step': 5922, 'epoch': 3} +{'type': 'loss', 'content': 0.00014491185720544308, 'timestamp': '2025-09-10 02:52:09.294051', 'step': 5923, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:09.347443', 'step': 5923, 'epoch': 3} +{'type': 'loss', 'content': 0.000394363421946764, 'timestamp': '2025-09-10 02:52:09.353898', 'step': 5924, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:09.406558', 'step': 5924, 'epoch': 3} +{'type': 'loss', 'content': 0.0008359685307368636, 'timestamp': '2025-09-10 02:52:09.412980', 'step': 5925, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:09.466159', 'step': 5925, 'epoch': 3} +{'type': 'loss', 'content': 0.00016688613686710596, 'timestamp': '2025-09-10 02:52:09.468323', 'step': 5926, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:09.521687', 'step': 5926, 'epoch': 3} +{'type': 'loss', 'content': 0.005174115300178528, 'timestamp': '2025-09-10 02:52:09.523924', 'step': 5927, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:09.576650', 'step': 5927, 'epoch': 3} +{'type': 'loss', 'content': 0.0033108533825725317, 'timestamp': '2025-09-10 02:52:09.582673', 'step': 5928, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:09.634968', 'step': 5928, 'epoch': 3} +{'type': 'loss', 'content': 0.0003466351772658527, 'timestamp': '2025-09-10 02:52:09.641489', 'step': 5929, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:09.694949', 'step': 5929, 'epoch': 3} +{'type': 'loss', 'content': 0.00012293375039007515, 'timestamp': '2025-09-10 02:52:09.701451', 'step': 5930, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:09.755155', 'step': 5930, 'epoch': 3} +{'type': 'loss', 'content': 0.0008823301759548485, 'timestamp': '2025-09-10 02:52:09.757458', 'step': 5931, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:09.811106', 'step': 5931, 'epoch': 3} +{'type': 'loss', 'content': 0.0021824785508215427, 'timestamp': '2025-09-10 02:52:09.817250', 'step': 5932, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:09.870171', 'step': 5932, 'epoch': 3} +{'type': 'loss', 'content': 0.0024673170410096645, 'timestamp': '2025-09-10 02:52:09.872249', 'step': 5933, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:09.925172', 'step': 5933, 'epoch': 3} +{'type': 'loss', 'content': 0.033712733536958694, 'timestamp': '2025-09-10 02:52:09.927398', 'step': 5934, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:09.979801', 'step': 5934, 'epoch': 3} +{'type': 'loss', 'content': 0.00021428009495139122, 'timestamp': '2025-09-10 02:52:09.982214', 'step': 5935, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:10.034957', 'step': 5935, 'epoch': 3} +{'type': 'loss', 'content': 3.840552381007001e-05, 'timestamp': '2025-09-10 02:52:10.040982', 'step': 5936, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:10.093895', 'step': 5936, 'epoch': 3} +{'type': 'loss', 'content': 0.00015409817569889128, 'timestamp': '2025-09-10 02:52:10.096123', 'step': 5937, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:10.149800', 'step': 5937, 'epoch': 3} +{'type': 'loss', 'content': 0.00022064820223022252, 'timestamp': '2025-09-10 02:52:10.159398', 'step': 5938, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:10.212968', 'step': 5938, 'epoch': 3} +{'type': 'loss', 'content': 0.024093538522720337, 'timestamp': '2025-09-10 02:52:10.215126', 'step': 5939, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:10.269070', 'step': 5939, 'epoch': 3} +{'type': 'loss', 'content': 0.002549313474446535, 'timestamp': '2025-09-10 02:52:10.279477', 'step': 5940, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:10.333409', 'step': 5940, 'epoch': 3} +{'type': 'loss', 'content': 0.00023986550513654947, 'timestamp': '2025-09-10 02:52:10.335801', 'step': 5941, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:10.388750', 'step': 5941, 'epoch': 3} +{'type': 'loss', 'content': 0.00015712468302808702, 'timestamp': '2025-09-10 02:52:10.390808', 'step': 5942, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:10.444093', 'step': 5942, 'epoch': 3} +{'type': 'loss', 'content': 0.009599327109754086, 'timestamp': '2025-09-10 02:52:10.450488', 'step': 5943, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:10.504262', 'step': 5943, 'epoch': 3} +{'type': 'loss', 'content': 7.971248851390556e-05, 'timestamp': '2025-09-10 02:52:10.510418', 'step': 5944, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:10.562856', 'step': 5944, 'epoch': 3} +{'type': 'loss', 'content': 0.0002541661378927529, 'timestamp': '2025-09-10 02:52:10.565007', 'step': 5945, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:10.618683', 'step': 5945, 'epoch': 3} +{'type': 'loss', 'content': 7.932009611977264e-05, 'timestamp': '2025-09-10 02:52:10.628325', 'step': 5946, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:10.681668', 'step': 5946, 'epoch': 3} +{'type': 'loss', 'content': 0.0011786666000261903, 'timestamp': '2025-09-10 02:52:10.683687', 'step': 5947, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:10.736058', 'step': 5947, 'epoch': 3} +{'type': 'loss', 'content': 0.0002877658698707819, 'timestamp': '2025-09-10 02:52:10.742013', 'step': 5948, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:10.794233', 'step': 5948, 'epoch': 3} +{'type': 'loss', 'content': 0.0009586106170900166, 'timestamp': '2025-09-10 02:52:10.796394', 'step': 5949, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:10.849752', 'step': 5949, 'epoch': 3} +{'type': 'loss', 'content': 0.00015108101069927216, 'timestamp': '2025-09-10 02:52:10.852091', 'step': 5950, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:10.905616', 'step': 5950, 'epoch': 3} +{'type': 'loss', 'content': 0.0005361175863072276, 'timestamp': '2025-09-10 02:52:10.911780', 'step': 5951, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:10.966166', 'step': 5951, 'epoch': 3} +{'type': 'loss', 'content': 0.0019740124698728323, 'timestamp': '2025-09-10 02:52:10.976576', 'step': 5952, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:11.030773', 'step': 5952, 'epoch': 3} +{'type': 'loss', 'content': 0.0002478157985024154, 'timestamp': '2025-09-10 02:52:11.038570', 'step': 5953, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:11.092652', 'step': 5953, 'epoch': 3} +{'type': 'loss', 'content': 4.527694545686245e-05, 'timestamp': '2025-09-10 02:52:11.095013', 'step': 5954, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:11.148521', 'step': 5954, 'epoch': 3} +{'type': 'loss', 'content': 0.0005762215587310493, 'timestamp': '2025-09-10 02:52:11.151148', 'step': 5955, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:11.204443', 'step': 5955, 'epoch': 3} +{'type': 'loss', 'content': 0.0024336830247193575, 'timestamp': '2025-09-10 02:52:11.210646', 'step': 5956, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:11.263810', 'step': 5956, 'epoch': 3} +{'type': 'loss', 'content': 0.02025148645043373, 'timestamp': '2025-09-10 02:52:11.265966', 'step': 5957, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:11.320089', 'step': 5957, 'epoch': 3} +{'type': 'loss', 'content': 0.025399362668395042, 'timestamp': '2025-09-10 02:52:11.322691', 'step': 5958, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:11.376963', 'step': 5958, 'epoch': 3} +{'type': 'loss', 'content': 0.0003289018932264298, 'timestamp': '2025-09-10 02:52:11.379232', 'step': 5959, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:11.432533', 'step': 5959, 'epoch': 3} +{'type': 'loss', 'content': 0.01882443018257618, 'timestamp': '2025-09-10 02:52:11.438943', 'step': 5960, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:11.492175', 'step': 5960, 'epoch': 3} +{'type': 'loss', 'content': 0.023095743730664253, 'timestamp': '2025-09-10 02:52:11.494214', 'step': 5961, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:11.548047', 'step': 5961, 'epoch': 3} +{'type': 'loss', 'content': 0.0015766258584335446, 'timestamp': '2025-09-10 02:52:11.550568', 'step': 5962, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:11.604030', 'step': 5962, 'epoch': 3} +{'type': 'loss', 'content': 0.06276575475931168, 'timestamp': '2025-09-10 02:52:11.606321', 'step': 5963, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:52:11.673075', 'step': 5963, 'epoch': 3} +{'type': 'loss', 'content': 8.02222202764824e-05, 'timestamp': '2025-09-10 02:52:11.686062', 'step': 5964, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:11.739255', 'step': 5964, 'epoch': 3} +{'type': 'loss', 'content': 0.0008802754455246031, 'timestamp': '2025-09-10 02:52:11.745086', 'step': 5965, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:11.799297', 'step': 5965, 'epoch': 3} +{'type': 'loss', 'content': 0.00012759133824147284, 'timestamp': '2025-09-10 02:52:11.801462', 'step': 5966, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:11.855357', 'step': 5966, 'epoch': 3} +{'type': 'loss', 'content': 0.03568331152200699, 'timestamp': '2025-09-10 02:52:11.864997', 'step': 5967, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:11.918734', 'step': 5967, 'epoch': 3} +{'type': 'loss', 'content': 0.021555619314312935, 'timestamp': '2025-09-10 02:52:11.925376', 'step': 5968, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:11.978247', 'step': 5968, 'epoch': 3} +{'type': 'loss', 'content': 0.0001967688003787771, 'timestamp': '2025-09-10 02:52:11.980602', 'step': 5969, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:12.033457', 'step': 5969, 'epoch': 3} +{'type': 'loss', 'content': 0.00020397835760377347, 'timestamp': '2025-09-10 02:52:12.035790', 'step': 5970, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:12.089352', 'step': 5970, 'epoch': 3} +{'type': 'loss', 'content': 0.0021871724165976048, 'timestamp': '2025-09-10 02:52:12.091909', 'step': 5971, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:12.145801', 'step': 5971, 'epoch': 3} +{'type': 'loss', 'content': 0.004574107937514782, 'timestamp': '2025-09-10 02:52:12.152128', 'step': 5972, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:12.205074', 'step': 5972, 'epoch': 3} +{'type': 'loss', 'content': 0.00011556023673620075, 'timestamp': '2025-09-10 02:52:12.207348', 'step': 5973, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:12.260684', 'step': 5973, 'epoch': 3} +{'type': 'loss', 'content': 0.012694244273006916, 'timestamp': '2025-09-10 02:52:12.263412', 'step': 5974, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:12.322239', 'step': 5974, 'epoch': 3} +{'type': 'loss', 'content': 0.0004663243016693741, 'timestamp': '2025-09-10 02:52:12.332688', 'step': 5975, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:52:12.401041', 'step': 5975, 'epoch': 3} +{'type': 'loss', 'content': 0.03924744203686714, 'timestamp': '2025-09-10 02:52:12.415190', 'step': 5976, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:12.468799', 'step': 5976, 'epoch': 3} +{'type': 'loss', 'content': 0.0029189076740294695, 'timestamp': '2025-09-10 02:52:12.476530', 'step': 5977, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:12.530058', 'step': 5977, 'epoch': 3} +{'type': 'loss', 'content': 0.002337169134989381, 'timestamp': '2025-09-10 02:52:12.532428', 'step': 5978, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:12.585744', 'step': 5978, 'epoch': 3} +{'type': 'loss', 'content': 0.005500641651451588, 'timestamp': '2025-09-10 02:52:12.587886', 'step': 5979, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:12.641443', 'step': 5979, 'epoch': 3} +{'type': 'loss', 'content': 0.004958459176123142, 'timestamp': '2025-09-10 02:52:12.649982', 'step': 5980, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:12.703633', 'step': 5980, 'epoch': 3} +{'type': 'loss', 'content': 0.0035466270055621862, 'timestamp': '2025-09-10 02:52:12.714099', 'step': 5981, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:12.767696', 'step': 5981, 'epoch': 3} +{'type': 'loss', 'content': 0.00018062000162899494, 'timestamp': '2025-09-10 02:52:12.773590', 'step': 5982, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:12.828086', 'step': 5982, 'epoch': 3} +{'type': 'loss', 'content': 0.0318772979080677, 'timestamp': '2025-09-10 02:52:12.837889', 'step': 5983, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:12.891557', 'step': 5983, 'epoch': 3} +{'type': 'loss', 'content': 0.0030142883770167828, 'timestamp': '2025-09-10 02:52:12.897803', 'step': 5984, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:12.950298', 'step': 5984, 'epoch': 3} +{'type': 'loss', 'content': 0.0011745187221094966, 'timestamp': '2025-09-10 02:52:12.958362', 'step': 5985, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:52:13.033789', 'step': 5985, 'epoch': 3} +{'type': 'loss', 'content': 0.000578440201934427, 'timestamp': '2025-09-10 02:52:13.047827', 'step': 5986, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:52:13.114802', 'step': 5986, 'epoch': 3} +{'type': 'loss', 'content': 0.0010196411749348044, 'timestamp': '2025-09-10 02:52:13.127017', 'step': 5987, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:13.181487', 'step': 5987, 'epoch': 3} +{'type': 'loss', 'content': 0.001312433509156108, 'timestamp': '2025-09-10 02:52:13.187971', 'step': 5988, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:13.241440', 'step': 5988, 'epoch': 3} +{'type': 'loss', 'content': 0.00018528975488152355, 'timestamp': '2025-09-10 02:52:13.243602', 'step': 5989, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:13.297418', 'step': 5989, 'epoch': 3} +{'type': 'loss', 'content': 0.0006053309189155698, 'timestamp': '2025-09-10 02:52:13.299695', 'step': 5990, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:13.353490', 'step': 5990, 'epoch': 3} +{'type': 'loss', 'content': 0.00031967618269845843, 'timestamp': '2025-09-10 02:52:13.355835', 'step': 5991, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:13.410304', 'step': 5991, 'epoch': 3} +{'type': 'loss', 'content': 0.006888638716191053, 'timestamp': '2025-09-10 02:52:13.418570', 'step': 5992, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:13.472136', 'step': 5992, 'epoch': 3} +{'type': 'loss', 'content': 0.0014289746759459376, 'timestamp': '2025-09-10 02:52:13.474448', 'step': 5993, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:13.529333', 'step': 5993, 'epoch': 3} +{'type': 'loss', 'content': 0.008395778015255928, 'timestamp': '2025-09-10 02:52:13.531902', 'step': 5994, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:13.585790', 'step': 5994, 'epoch': 3} +{'type': 'loss', 'content': 0.0003958672168664634, 'timestamp': '2025-09-10 02:52:13.588030', 'step': 5995, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:13.642165', 'step': 5995, 'epoch': 3} +{'type': 'loss', 'content': 0.0026239112485200167, 'timestamp': '2025-09-10 02:52:13.648636', 'step': 5996, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:13.708132', 'step': 5996, 'epoch': 3} +{'type': 'loss', 'content': 0.00046100522740744054, 'timestamp': '2025-09-10 02:52:13.719871', 'step': 5997, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:13.774716', 'step': 5997, 'epoch': 3} +{'type': 'loss', 'content': 0.02448379620909691, 'timestamp': '2025-09-10 02:52:13.777203', 'step': 5998, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:13.832896', 'step': 5998, 'epoch': 3} +{'type': 'loss', 'content': 0.0010140178492292762, 'timestamp': '2025-09-10 02:52:13.835196', 'step': 5999, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:13.889114', 'step': 5999, 'epoch': 3} +{'type': 'loss', 'content': 0.0031572221778333187, 'timestamp': '2025-09-10 02:52:13.899407', 'step': 6000, 'epoch': 3} +{'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-10 02:52:14.404496', 'step': 6000, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:14.467266', 'step': 6000, 'epoch': 3} +{'type': 'loss', 'content': 0.00017229202785529196, 'timestamp': '2025-09-10 02:52:14.473338', 'step': 6001, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:14.540019', 'step': 6001, 'epoch': 3} +{'type': 'loss', 'content': 0.004986428655683994, 'timestamp': '2025-09-10 02:52:14.549806', 'step': 6002, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:14.617694', 'step': 6002, 'epoch': 3} +{'type': 'loss', 'content': 0.0002928458561655134, 'timestamp': '2025-09-10 02:52:14.628120', 'step': 6003, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:14.695981', 'step': 6003, 'epoch': 3} +{'type': 'loss', 'content': 0.00286271795630455, 'timestamp': '2025-09-10 02:52:14.703850', 'step': 6004, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:14.766304', 'step': 6004, 'epoch': 3} +{'type': 'loss', 'content': 0.0004062298103235662, 'timestamp': '2025-09-10 02:52:14.768662', 'step': 6005, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:14.837279', 'step': 6005, 'epoch': 3} +{'type': 'loss', 'content': 0.000835696526337415, 'timestamp': '2025-09-10 02:52:14.840448', 'step': 6006, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:14.905463', 'step': 6006, 'epoch': 3} +{'type': 'loss', 'content': 0.0014434696640819311, 'timestamp': '2025-09-10 02:52:14.911984', 'step': 6007, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:14.974790', 'step': 6007, 'epoch': 3} +{'type': 'loss', 'content': 0.001262914971448481, 'timestamp': '2025-09-10 02:52:14.983796', 'step': 6008, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:15.046599', 'step': 6008, 'epoch': 3} +{'type': 'loss', 'content': 0.0002870490134228021, 'timestamp': '2025-09-10 02:52:15.049247', 'step': 6009, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:15.120134', 'step': 6009, 'epoch': 3} +{'type': 'loss', 'content': 0.019455747678875923, 'timestamp': '2025-09-10 02:52:15.123237', 'step': 6010, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:15.193030', 'step': 6010, 'epoch': 3} +{'type': 'loss', 'content': 0.0009880803991109133, 'timestamp': '2025-09-10 02:52:15.203904', 'step': 6011, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:15.257809', 'step': 6011, 'epoch': 3} +{'type': 'loss', 'content': 0.0006967498338781297, 'timestamp': '2025-09-10 02:52:15.266504', 'step': 6012, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:15.320326', 'step': 6012, 'epoch': 3} +{'type': 'loss', 'content': 0.006020539440214634, 'timestamp': '2025-09-10 02:52:15.322726', 'step': 6013, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:15.375817', 'step': 6013, 'epoch': 3} +{'type': 'loss', 'content': 0.00047595458454452455, 'timestamp': '2025-09-10 02:52:15.377903', 'step': 6014, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:15.431920', 'step': 6014, 'epoch': 3} +{'type': 'loss', 'content': 0.002407153369858861, 'timestamp': '2025-09-10 02:52:15.441480', 'step': 6015, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:52:15.514101', 'step': 6015, 'epoch': 3} +{'type': 'loss', 'content': 0.002565657254308462, 'timestamp': '2025-09-10 02:52:15.528337', 'step': 6016, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:15.580579', 'step': 6016, 'epoch': 3} +{'type': 'loss', 'content': 0.006355735007673502, 'timestamp': '2025-09-10 02:52:15.582905', 'step': 6017, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:15.635325', 'step': 6017, 'epoch': 3} +{'type': 'loss', 'content': 0.0008355194586329162, 'timestamp': '2025-09-10 02:52:15.637474', 'step': 6018, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:52:15.705707', 'step': 6018, 'epoch': 3} +{'type': 'loss', 'content': 0.0021186231169849634, 'timestamp': '2025-09-10 02:52:15.718319', 'step': 6019, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:15.770813', 'step': 6019, 'epoch': 3} +{'type': 'loss', 'content': 0.0009638232295401394, 'timestamp': '2025-09-10 02:52:15.776854', 'step': 6020, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:15.830068', 'step': 6020, 'epoch': 3} +{'type': 'loss', 'content': 0.0010094494791701436, 'timestamp': '2025-09-10 02:52:15.836023', 'step': 6021, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:15.894317', 'step': 6021, 'epoch': 3} +{'type': 'loss', 'content': 0.0005834107869304717, 'timestamp': '2025-09-10 02:52:15.904730', 'step': 6022, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:15.958784', 'step': 6022, 'epoch': 3} +{'type': 'loss', 'content': 0.0023633132223039865, 'timestamp': '2025-09-10 02:52:15.961649', 'step': 6023, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:16.014832', 'step': 6023, 'epoch': 3} +{'type': 'loss', 'content': 0.0012404838344082236, 'timestamp': '2025-09-10 02:52:16.020849', 'step': 6024, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:16.073197', 'step': 6024, 'epoch': 3} +{'type': 'loss', 'content': 0.00593190360814333, 'timestamp': '2025-09-10 02:52:16.076246', 'step': 6025, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:16.129130', 'step': 6025, 'epoch': 3} +{'type': 'loss', 'content': 0.0059447018429636955, 'timestamp': '2025-09-10 02:52:16.135654', 'step': 6026, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:16.188272', 'step': 6026, 'epoch': 3} +{'type': 'loss', 'content': 0.0013655134243890643, 'timestamp': '2025-09-10 02:52:16.196537', 'step': 6027, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:52:33.029849', 'step': 6027, 'epoch': 3} +{'type': 'pplx', 'content': 23118717.744525813, 'timestamp': '2025-09-10 02:52:33.034640', 'step': 6027, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:33.091655', 'step': 6027, 'epoch': 3} +{'type': 'loss', 'content': 0.0071800448931753635, 'timestamp': '2025-09-10 02:52:33.099957', 'step': 6028, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:33.161885', 'step': 6028, 'epoch': 3} +{'type': 'loss', 'content': 0.0048779817298054695, 'timestamp': '2025-09-10 02:52:33.163931', 'step': 6029, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:33.226906', 'step': 6029, 'epoch': 3} +{'type': 'loss', 'content': 0.0011815342586487532, 'timestamp': '2025-09-10 02:52:33.235156', 'step': 6030, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:33.296963', 'step': 6030, 'epoch': 3} +{'type': 'loss', 'content': 0.008699423633515835, 'timestamp': '2025-09-10 02:52:33.307821', 'step': 6031, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:52:33.376889', 'step': 6031, 'epoch': 3} +{'type': 'loss', 'content': 0.0011699561728164554, 'timestamp': '2025-09-10 02:52:33.390372', 'step': 6032, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:33.443969', 'step': 6032, 'epoch': 3} +{'type': 'loss', 'content': 0.0005398035864345729, 'timestamp': '2025-09-10 02:52:33.445942', 'step': 6033, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:33.500888', 'step': 6033, 'epoch': 3} +{'type': 'loss', 'content': 0.0015815923688933253, 'timestamp': '2025-09-10 02:52:33.510619', 'step': 6034, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:33.564357', 'step': 6034, 'epoch': 3} +{'type': 'loss', 'content': 0.007847676984965801, 'timestamp': '2025-09-10 02:52:33.566428', 'step': 6035, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:33.624295', 'step': 6035, 'epoch': 3} +{'type': 'loss', 'content': 0.0027494707610458136, 'timestamp': '2025-09-10 02:52:33.630433', 'step': 6036, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:33.683950', 'step': 6036, 'epoch': 3} +{'type': 'loss', 'content': 0.002524003619328141, 'timestamp': '2025-09-10 02:52:33.694448', 'step': 6037, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:33.752773', 'step': 6037, 'epoch': 3} +{'type': 'loss', 'content': 0.002844104077666998, 'timestamp': '2025-09-10 02:52:33.763186', 'step': 6038, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:33.819637', 'step': 6038, 'epoch': 3} +{'type': 'loss', 'content': 0.008749146945774555, 'timestamp': '2025-09-10 02:52:33.829227', 'step': 6039, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:33.883108', 'step': 6039, 'epoch': 3} +{'type': 'loss', 'content': 0.0008817720226943493, 'timestamp': '2025-09-10 02:52:33.892137', 'step': 6040, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:33.949163', 'step': 6040, 'epoch': 3} +{'type': 'loss', 'content': 0.0006424154853448272, 'timestamp': '2025-09-10 02:52:33.960270', 'step': 6041, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:34.013199', 'step': 6041, 'epoch': 3} +{'type': 'loss', 'content': 0.006086348555982113, 'timestamp': '2025-09-10 02:52:34.015916', 'step': 6042, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:34.069368', 'step': 6042, 'epoch': 3} +{'type': 'loss', 'content': 0.001611717278137803, 'timestamp': '2025-09-10 02:52:34.071485', 'step': 6043, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:34.126223', 'step': 6043, 'epoch': 3} +{'type': 'loss', 'content': 0.0015470280777662992, 'timestamp': '2025-09-10 02:52:34.136639', 'step': 6044, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:34.194054', 'step': 6044, 'epoch': 3} +{'type': 'loss', 'content': 0.002855368424206972, 'timestamp': '2025-09-10 02:52:34.205318', 'step': 6045, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:34.258058', 'step': 6045, 'epoch': 3} +{'type': 'loss', 'content': 0.0017798944609239697, 'timestamp': '2025-09-10 02:52:34.260366', 'step': 6046, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:34.313960', 'step': 6046, 'epoch': 3} +{'type': 'loss', 'content': 0.0015405418816953897, 'timestamp': '2025-09-10 02:52:34.323593', 'step': 6047, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:34.376921', 'step': 6047, 'epoch': 3} +{'type': 'loss', 'content': 0.010371079668402672, 'timestamp': '2025-09-10 02:52:34.386069', 'step': 6048, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:52:34.444595', 'step': 6048, 'epoch': 3} +{'type': 'loss', 'content': 0.0012742745457217097, 'timestamp': '2025-09-10 02:52:34.456164', 'step': 6049, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:34.514344', 'step': 6049, 'epoch': 3} +{'type': 'loss', 'content': 0.0007821349427103996, 'timestamp': '2025-09-10 02:52:34.524768', 'step': 6050, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:34.578624', 'step': 6050, 'epoch': 3} +{'type': 'loss', 'content': 0.004469706676900387, 'timestamp': '2025-09-10 02:52:34.581018', 'step': 6051, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:34.633914', 'step': 6051, 'epoch': 3} +{'type': 'loss', 'content': 0.0021879347041249275, 'timestamp': '2025-09-10 02:52:34.639943', 'step': 6052, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:34.692381', 'step': 6052, 'epoch': 3} +{'type': 'loss', 'content': 0.0008522382704541087, 'timestamp': '2025-09-10 02:52:34.694703', 'step': 6053, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:34.748527', 'step': 6053, 'epoch': 3} +{'type': 'loss', 'content': 0.0008947536698542535, 'timestamp': '2025-09-10 02:52:34.758122', 'step': 6054, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:34.811428', 'step': 6054, 'epoch': 3} +{'type': 'loss', 'content': 0.0014520528493449092, 'timestamp': '2025-09-10 02:52:34.817891', 'step': 6055, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:34.873736', 'step': 6055, 'epoch': 3} +{'type': 'loss', 'content': 0.00011400927178328857, 'timestamp': '2025-09-10 02:52:34.881001', 'step': 6056, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:34.933627', 'step': 6056, 'epoch': 3} +{'type': 'loss', 'content': 0.00031033085542730987, 'timestamp': '2025-09-10 02:52:34.935873', 'step': 6057, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:34.989128', 'step': 6057, 'epoch': 3} +{'type': 'loss', 'content': 0.0033830462489277124, 'timestamp': '2025-09-10 02:52:34.998760', 'step': 6058, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:35.051626', 'step': 6058, 'epoch': 3} +{'type': 'loss', 'content': 0.0006093700067140162, 'timestamp': '2025-09-10 02:52:35.053723', 'step': 6059, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:35.106339', 'step': 6059, 'epoch': 3} +{'type': 'loss', 'content': 0.0004717320844065398, 'timestamp': '2025-09-10 02:52:35.115245', 'step': 6060, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:35.167772', 'step': 6060, 'epoch': 3} +{'type': 'loss', 'content': 9.661592775955796e-05, 'timestamp': '2025-09-10 02:52:35.170011', 'step': 6061, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:35.222954', 'step': 6061, 'epoch': 3} +{'type': 'loss', 'content': 0.0013968811836093664, 'timestamp': '2025-09-10 02:52:35.225171', 'step': 6062, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:35.278304', 'step': 6062, 'epoch': 3} +{'type': 'loss', 'content': 8.858936780598015e-05, 'timestamp': '2025-09-10 02:52:35.284924', 'step': 6063, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:35.337706', 'step': 6063, 'epoch': 3} +{'type': 'loss', 'content': 0.001150972326286137, 'timestamp': '2025-09-10 02:52:35.343669', 'step': 6064, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:35.397131', 'step': 6064, 'epoch': 3} +{'type': 'loss', 'content': 0.000846659007947892, 'timestamp': '2025-09-10 02:52:35.407595', 'step': 6065, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:35.469264', 'step': 6065, 'epoch': 3} +{'type': 'loss', 'content': 0.001177606056444347, 'timestamp': '2025-09-10 02:52:35.480154', 'step': 6066, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:35.535306', 'step': 6066, 'epoch': 3} +{'type': 'loss', 'content': 0.000570034550037235, 'timestamp': '2025-09-10 02:52:35.545170', 'step': 6067, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:35.598010', 'step': 6067, 'epoch': 3} +{'type': 'loss', 'content': 0.00030715492903254926, 'timestamp': '2025-09-10 02:52:35.605428', 'step': 6068, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:35.657816', 'step': 6068, 'epoch': 3} +{'type': 'loss', 'content': 0.00012023419549223036, 'timestamp': '2025-09-10 02:52:35.659973', 'step': 6069, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:35.712542', 'step': 6069, 'epoch': 3} +{'type': 'loss', 'content': 0.0075134942308068275, 'timestamp': '2025-09-10 02:52:35.714654', 'step': 6070, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:35.774872', 'step': 6070, 'epoch': 3} +{'type': 'loss', 'content': 0.00048709503607824445, 'timestamp': '2025-09-10 02:52:35.785815', 'step': 6071, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:35.839061', 'step': 6071, 'epoch': 3} +{'type': 'loss', 'content': 3.048846156161744e-05, 'timestamp': '2025-09-10 02:52:35.845092', 'step': 6072, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:35.898252', 'step': 6072, 'epoch': 3} +{'type': 'loss', 'content': 0.000885853951331228, 'timestamp': '2025-09-10 02:52:35.908755', 'step': 6073, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:35.962978', 'step': 6073, 'epoch': 3} +{'type': 'loss', 'content': 0.0009545084903948009, 'timestamp': '2025-09-10 02:52:35.965336', 'step': 6074, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:36.018839', 'step': 6074, 'epoch': 3} +{'type': 'loss', 'content': 0.0015305677661672235, 'timestamp': '2025-09-10 02:52:36.025045', 'step': 6075, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:36.079706', 'step': 6075, 'epoch': 3} +{'type': 'loss', 'content': 0.011637846939265728, 'timestamp': '2025-09-10 02:52:36.090305', 'step': 6076, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:52:36.149404', 'step': 6076, 'epoch': 3} +{'type': 'loss', 'content': 0.0004362560866866261, 'timestamp': '2025-09-10 02:52:36.160887', 'step': 6077, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:36.214008', 'step': 6077, 'epoch': 3} +{'type': 'loss', 'content': 6.658708298346028e-05, 'timestamp': '2025-09-10 02:52:36.220357', 'step': 6078, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:52:36.281583', 'step': 6078, 'epoch': 3} +{'type': 'loss', 'content': 0.0001787340734153986, 'timestamp': '2025-09-10 02:52:36.292470', 'step': 6079, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:36.346165', 'step': 6079, 'epoch': 3} +{'type': 'loss', 'content': 0.000420929427491501, 'timestamp': '2025-09-10 02:52:36.352251', 'step': 6080, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:36.404865', 'step': 6080, 'epoch': 3} +{'type': 'loss', 'content': 0.0015851258067414165, 'timestamp': '2025-09-10 02:52:36.407167', 'step': 6081, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:36.459981', 'step': 6081, 'epoch': 3} +{'type': 'loss', 'content': 0.020229142159223557, 'timestamp': '2025-09-10 02:52:36.466589', 'step': 6082, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:36.520007', 'step': 6082, 'epoch': 3} +{'type': 'loss', 'content': 0.00032248589559458196, 'timestamp': '2025-09-10 02:52:36.522254', 'step': 6083, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:36.580299', 'step': 6083, 'epoch': 3} +{'type': 'loss', 'content': 0.0052845412865281105, 'timestamp': '2025-09-10 02:52:36.591538', 'step': 6084, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:36.644196', 'step': 6084, 'epoch': 3} +{'type': 'loss', 'content': 0.00017015948833432049, 'timestamp': '2025-09-10 02:52:36.650489', 'step': 6085, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:52:36.708430', 'step': 6085, 'epoch': 3} +{'type': 'loss', 'content': 0.02619732730090618, 'timestamp': '2025-09-10 02:52:36.718925', 'step': 6086, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:36.773419', 'step': 6086, 'epoch': 3} +{'type': 'loss', 'content': 4.551394158625044e-05, 'timestamp': '2025-09-10 02:52:36.775673', 'step': 6087, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:36.829168', 'step': 6087, 'epoch': 3} +{'type': 'loss', 'content': 0.0001303064200328663, 'timestamp': '2025-09-10 02:52:36.835538', 'step': 6088, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:36.888503', 'step': 6088, 'epoch': 3} +{'type': 'loss', 'content': 0.0016703010769560933, 'timestamp': '2025-09-10 02:52:36.891325', 'step': 6089, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:36.944701', 'step': 6089, 'epoch': 3} +{'type': 'loss', 'content': 0.005203016102313995, 'timestamp': '2025-09-10 02:52:36.952785', 'step': 6090, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:37.006194', 'step': 6090, 'epoch': 3} +{'type': 'loss', 'content': 8.93345622898778e-06, 'timestamp': '2025-09-10 02:52:37.008457', 'step': 6091, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:37.061654', 'step': 6091, 'epoch': 3} +{'type': 'loss', 'content': 0.0001508936838945374, 'timestamp': '2025-09-10 02:52:37.068932', 'step': 6092, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:37.121674', 'step': 6092, 'epoch': 3} +{'type': 'loss', 'content': 0.00010944458335870877, 'timestamp': '2025-09-10 02:52:37.124877', 'step': 6093, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 624], 'flops': 12480075828672.0}, 'timestamp': '2025-09-10 02:52:37.216274', 'step': 6093, 'epoch': 3} +{'type': 'loss', 'content': 0.003187777940183878, 'timestamp': '2025-09-10 02:52:37.233627', 'step': 6094, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:37.287810', 'step': 6094, 'epoch': 3} +{'type': 'loss', 'content': 0.00022569263819605112, 'timestamp': '2025-09-10 02:52:37.290166', 'step': 6095, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:37.343523', 'step': 6095, 'epoch': 3} +{'type': 'loss', 'content': 0.00011813950550276786, 'timestamp': '2025-09-10 02:52:37.349881', 'step': 6096, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:52:37.421815', 'step': 6096, 'epoch': 3} +{'type': 'loss', 'content': 0.0008416800992563367, 'timestamp': '2025-09-10 02:52:37.436731', 'step': 6097, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:37.491553', 'step': 6097, 'epoch': 3} +{'type': 'loss', 'content': 0.0006296943174675107, 'timestamp': '2025-09-10 02:52:37.501323', 'step': 6098, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:37.554742', 'step': 6098, 'epoch': 3} +{'type': 'loss', 'content': 4.272789738024585e-05, 'timestamp': '2025-09-10 02:52:37.557080', 'step': 6099, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:37.610619', 'step': 6099, 'epoch': 3} +{'type': 'loss', 'content': 0.0005077628302387893, 'timestamp': '2025-09-10 02:52:37.622175', 'step': 6100, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:37.683574', 'step': 6100, 'epoch': 3} +{'type': 'loss', 'content': 0.00012766268628183752, 'timestamp': '2025-09-10 02:52:37.691599', 'step': 6101, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:37.750681', 'step': 6101, 'epoch': 3} +{'type': 'loss', 'content': 0.0014411035226657987, 'timestamp': '2025-09-10 02:52:37.758605', 'step': 6102, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:37.813848', 'step': 6102, 'epoch': 3} +{'type': 'loss', 'content': 0.021063178777694702, 'timestamp': '2025-09-10 02:52:37.821372', 'step': 6103, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:37.877240', 'step': 6103, 'epoch': 3} +{'type': 'loss', 'content': 0.0005084304721094668, 'timestamp': '2025-09-10 02:52:37.888506', 'step': 6104, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:37.944235', 'step': 6104, 'epoch': 3} +{'type': 'loss', 'content': 0.00013107885024510324, 'timestamp': '2025-09-10 02:52:37.948948', 'step': 6105, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:38.002955', 'step': 6105, 'epoch': 3} +{'type': 'loss', 'content': 0.000524270290043205, 'timestamp': '2025-09-10 02:52:38.011025', 'step': 6106, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:38.071444', 'step': 6106, 'epoch': 3} +{'type': 'loss', 'content': 7.484802335966378e-05, 'timestamp': '2025-09-10 02:52:38.077778', 'step': 6107, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:38.138325', 'step': 6107, 'epoch': 3} +{'type': 'loss', 'content': 0.0005922123673371971, 'timestamp': '2025-09-10 02:52:38.149608', 'step': 6108, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:52:38.219329', 'step': 6108, 'epoch': 3} +{'type': 'loss', 'content': 0.00012583202624227852, 'timestamp': '2025-09-10 02:52:38.233053', 'step': 6109, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:38.296866', 'step': 6109, 'epoch': 3} +{'type': 'loss', 'content': 0.0004160685057286173, 'timestamp': '2025-09-10 02:52:38.303387', 'step': 6110, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:38.361019', 'step': 6110, 'epoch': 3} +{'type': 'loss', 'content': 5.073786451248452e-05, 'timestamp': '2025-09-10 02:52:38.369975', 'step': 6111, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:38.427110', 'step': 6111, 'epoch': 3} +{'type': 'loss', 'content': 0.006909678224474192, 'timestamp': '2025-09-10 02:52:38.441625', 'step': 6112, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:38.498147', 'step': 6112, 'epoch': 3} +{'type': 'loss', 'content': 0.00013595778727903962, 'timestamp': '2025-09-10 02:52:38.503891', 'step': 6113, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:52:38.566533', 'step': 6113, 'epoch': 3} +{'type': 'loss', 'content': 0.00021880402346141636, 'timestamp': '2025-09-10 02:52:38.568890', 'step': 6114, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:38.622010', 'step': 6114, 'epoch': 3} +{'type': 'loss', 'content': 6.544974894495681e-05, 'timestamp': '2025-09-10 02:52:38.624398', 'step': 6115, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:38.677335', 'step': 6115, 'epoch': 3} +{'type': 'loss', 'content': 0.00015257038467098027, 'timestamp': '2025-09-10 02:52:38.683295', 'step': 6116, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:38.735885', 'step': 6116, 'epoch': 3} +{'type': 'loss', 'content': 0.00020383484661579132, 'timestamp': '2025-09-10 02:52:38.742417', 'step': 6117, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:38.795880', 'step': 6117, 'epoch': 3} +{'type': 'loss', 'content': 0.005315456073731184, 'timestamp': '2025-09-10 02:52:38.797802', 'step': 6118, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:52:38.887778', 'step': 6118, 'epoch': 3} +{'type': 'loss', 'content': 0.006385329179465771, 'timestamp': '2025-09-10 02:52:38.904884', 'step': 6119, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:38.958727', 'step': 6119, 'epoch': 3} +{'type': 'loss', 'content': 0.0001608455495443195, 'timestamp': '2025-09-10 02:52:38.964591', 'step': 6120, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:39.016779', 'step': 6120, 'epoch': 3} +{'type': 'loss', 'content': 0.000821257010102272, 'timestamp': '2025-09-10 02:52:39.019025', 'step': 6121, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:39.071908', 'step': 6121, 'epoch': 3} +{'type': 'loss', 'content': 0.0008781217620708048, 'timestamp': '2025-09-10 02:52:39.074294', 'step': 6122, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:39.127230', 'step': 6122, 'epoch': 3} +{'type': 'loss', 'content': 0.06590049713850021, 'timestamp': '2025-09-10 02:52:39.129374', 'step': 6123, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:39.181928', 'step': 6123, 'epoch': 3} +{'type': 'loss', 'content': 0.000752792228013277, 'timestamp': '2025-09-10 02:52:39.187886', 'step': 6124, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:39.240005', 'step': 6124, 'epoch': 3} +{'type': 'loss', 'content': 0.0007797020371071994, 'timestamp': '2025-09-10 02:52:39.243124', 'step': 6125, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:39.295849', 'step': 6125, 'epoch': 3} +{'type': 'loss', 'content': 0.019136367365717888, 'timestamp': '2025-09-10 02:52:39.297999', 'step': 6126, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:39.350506', 'step': 6126, 'epoch': 3} +{'type': 'loss', 'content': 0.003150348784402013, 'timestamp': '2025-09-10 02:52:39.352990', 'step': 6127, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:39.405640', 'step': 6127, 'epoch': 3} +{'type': 'loss', 'content': 9.519495506538078e-05, 'timestamp': '2025-09-10 02:52:39.411555', 'step': 6128, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:39.463716', 'step': 6128, 'epoch': 3} +{'type': 'loss', 'content': 0.00013283970474731177, 'timestamp': '2025-09-10 02:52:39.466819', 'step': 6129, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:39.520533', 'step': 6129, 'epoch': 3} +{'type': 'loss', 'content': 0.00014158491103444248, 'timestamp': '2025-09-10 02:52:39.530120', 'step': 6130, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:39.582677', 'step': 6130, 'epoch': 3} +{'type': 'loss', 'content': 0.0024900168646126986, 'timestamp': '2025-09-10 02:52:39.584971', 'step': 6131, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:39.637806', 'step': 6131, 'epoch': 3} +{'type': 'loss', 'content': 0.0032622383441776037, 'timestamp': '2025-09-10 02:52:39.643743', 'step': 6132, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:39.696496', 'step': 6132, 'epoch': 3} +{'type': 'loss', 'content': 0.0009876827243715525, 'timestamp': '2025-09-10 02:52:39.704817', 'step': 6133, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:39.758051', 'step': 6133, 'epoch': 3} +{'type': 'loss', 'content': 0.007712595164775848, 'timestamp': '2025-09-10 02:52:39.760395', 'step': 6134, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:39.813640', 'step': 6134, 'epoch': 3} +{'type': 'loss', 'content': 0.00016517074254807085, 'timestamp': '2025-09-10 02:52:39.816522', 'step': 6135, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:39.870908', 'step': 6135, 'epoch': 3} +{'type': 'loss', 'content': 0.0018742047250270844, 'timestamp': '2025-09-10 02:52:39.877045', 'step': 6136, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:39.929047', 'step': 6136, 'epoch': 3} +{'type': 'loss', 'content': 0.007885963656008244, 'timestamp': '2025-09-10 02:52:39.931751', 'step': 6137, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:39.984069', 'step': 6137, 'epoch': 3} +{'type': 'loss', 'content': 0.0021347992587834597, 'timestamp': '2025-09-10 02:52:39.986332', 'step': 6138, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:40.038624', 'step': 6138, 'epoch': 3} +{'type': 'loss', 'content': 2.2866826839162968e-05, 'timestamp': '2025-09-10 02:52:40.040959', 'step': 6139, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:40.093467', 'step': 6139, 'epoch': 3} +{'type': 'loss', 'content': 0.012924431823194027, 'timestamp': '2025-09-10 02:52:40.099380', 'step': 6140, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:40.151493', 'step': 6140, 'epoch': 3} +{'type': 'loss', 'content': 0.0019553520251065493, 'timestamp': '2025-09-10 02:52:40.161783', 'step': 6141, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:40.214871', 'step': 6141, 'epoch': 3} +{'type': 'loss', 'content': 0.0001370320824207738, 'timestamp': '2025-09-10 02:52:40.217711', 'step': 6142, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:40.270040', 'step': 6142, 'epoch': 3} +{'type': 'loss', 'content': 0.04433509334921837, 'timestamp': '2025-09-10 02:52:40.272115', 'step': 6143, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:40.324490', 'step': 6143, 'epoch': 3} +{'type': 'loss', 'content': 0.002048466121777892, 'timestamp': '2025-09-10 02:52:40.330441', 'step': 6144, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:52:40.425013', 'step': 6144, 'epoch': 3} +{'type': 'loss', 'content': 0.0015178490430116653, 'timestamp': '2025-09-10 02:52:40.445400', 'step': 6145, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:40.499151', 'step': 6145, 'epoch': 3} +{'type': 'loss', 'content': 0.003375701140612364, 'timestamp': '2025-09-10 02:52:40.501437', 'step': 6146, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:40.554507', 'step': 6146, 'epoch': 3} +{'type': 'loss', 'content': 0.0025541919749230146, 'timestamp': '2025-09-10 02:52:40.556869', 'step': 6147, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:40.609949', 'step': 6147, 'epoch': 3} +{'type': 'loss', 'content': 0.01113084889948368, 'timestamp': '2025-09-10 02:52:40.615844', 'step': 6148, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:40.667782', 'step': 6148, 'epoch': 3} +{'type': 'loss', 'content': 0.0007157630170695484, 'timestamp': '2025-09-10 02:52:40.676089', 'step': 6149, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:40.730330', 'step': 6149, 'epoch': 3} +{'type': 'loss', 'content': 0.001018314273096621, 'timestamp': '2025-09-10 02:52:40.736073', 'step': 6150, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:40.789959', 'step': 6150, 'epoch': 3} +{'type': 'loss', 'content': 0.005166871007531881, 'timestamp': '2025-09-10 02:52:40.792412', 'step': 6151, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:52:40.845983', 'step': 6151, 'epoch': 3} +{'type': 'loss', 'content': 0.00041923296521417797, 'timestamp': '2025-09-10 02:52:40.852033', 'step': 6152, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:40.904415', 'step': 6152, 'epoch': 3} +{'type': 'loss', 'content': 0.0003797454119194299, 'timestamp': '2025-09-10 02:52:40.907826', 'step': 6153, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:40.963443', 'step': 6153, 'epoch': 3} +{'type': 'loss', 'content': 0.00042816068162210286, 'timestamp': '2025-09-10 02:52:40.969984', 'step': 6154, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:41.023183', 'step': 6154, 'epoch': 3} +{'type': 'loss', 'content': 0.00011409942817408592, 'timestamp': '2025-09-10 02:52:41.031330', 'step': 6155, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:41.084030', 'step': 6155, 'epoch': 3} +{'type': 'loss', 'content': 0.0017229224322363734, 'timestamp': '2025-09-10 02:52:41.089742', 'step': 6156, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:41.142200', 'step': 6156, 'epoch': 3} +{'type': 'loss', 'content': 0.0062713404186069965, 'timestamp': '2025-09-10 02:52:41.144209', 'step': 6157, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:41.196937', 'step': 6157, 'epoch': 3} +{'type': 'loss', 'content': 0.0001340866001555696, 'timestamp': '2025-09-10 02:52:41.199862', 'step': 6158, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:41.253181', 'step': 6158, 'epoch': 3} +{'type': 'loss', 'content': 5.393969331635162e-05, 'timestamp': '2025-09-10 02:52:41.255202', 'step': 6159, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:41.308061', 'step': 6159, 'epoch': 3} +{'type': 'loss', 'content': 9.729629528010264e-05, 'timestamp': '2025-09-10 02:52:41.313673', 'step': 6160, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:41.366465', 'step': 6160, 'epoch': 3} +{'type': 'loss', 'content': 0.0006829385529272258, 'timestamp': '2025-09-10 02:52:41.372808', 'step': 6161, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:52:41.441058', 'step': 6161, 'epoch': 3} +{'type': 'loss', 'content': 0.0006158491596579552, 'timestamp': '2025-09-10 02:52:41.453641', 'step': 6162, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:41.508485', 'step': 6162, 'epoch': 3} +{'type': 'loss', 'content': 0.00023279429296962917, 'timestamp': '2025-09-10 02:52:41.518297', 'step': 6163, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:41.571240', 'step': 6163, 'epoch': 3} +{'type': 'loss', 'content': 8.347532275365666e-05, 'timestamp': '2025-09-10 02:52:41.578531', 'step': 6164, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:41.630714', 'step': 6164, 'epoch': 3} +{'type': 'loss', 'content': 2.7901072826352902e-05, 'timestamp': '2025-09-10 02:52:41.633032', 'step': 6165, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:52:41.686104', 'step': 6165, 'epoch': 3} +{'type': 'loss', 'content': 0.012794139795005322, 'timestamp': '2025-09-10 02:52:41.691960', 'step': 6166, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:41.745007', 'step': 6166, 'epoch': 3} +{'type': 'loss', 'content': 0.001425377675332129, 'timestamp': '2025-09-10 02:52:41.747110', 'step': 6167, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:41.801138', 'step': 6167, 'epoch': 3} +{'type': 'loss', 'content': 0.0006850698264315724, 'timestamp': '2025-09-10 02:52:41.811718', 'step': 6168, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:41.863998', 'step': 6168, 'epoch': 3} +{'type': 'loss', 'content': 0.0008233123226091266, 'timestamp': '2025-09-10 02:52:41.872276', 'step': 6169, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:41.925347', 'step': 6169, 'epoch': 3} +{'type': 'loss', 'content': 0.0009910735534504056, 'timestamp': '2025-09-10 02:52:41.927443', 'step': 6170, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:41.980059', 'step': 6170, 'epoch': 3} +{'type': 'loss', 'content': 9.992994455387816e-05, 'timestamp': '2025-09-10 02:52:41.988358', 'step': 6171, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:42.041808', 'step': 6171, 'epoch': 3} +{'type': 'loss', 'content': 0.00030609042732976377, 'timestamp': '2025-09-10 02:52:42.047515', 'step': 6172, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:52:42.099885', 'step': 6172, 'epoch': 3} +{'type': 'loss', 'content': 0.0014566489262506366, 'timestamp': '2025-09-10 02:52:42.101949', 'step': 6173, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:52:42.154572', 'step': 6173, 'epoch': 3} +{'type': 'loss', 'content': 0.04792408272624016, 'timestamp': '2025-09-10 02:52:42.156780', 'step': 6174, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:52:58.917402', 'step': 6174, 'epoch': 3} +{'type': 'pplx', 'content': 23046639.44568271, 'timestamp': '2025-09-10 02:52:58.920389', 'step': 6174, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:58.975360', 'step': 6174, 'epoch': 3} +{'type': 'loss', 'content': 4.4728592911269516e-05, 'timestamp': '2025-09-10 02:52:58.977422', 'step': 6175, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:52:59.031208', 'step': 6175, 'epoch': 3} +{'type': 'loss', 'content': 0.04376506805419922, 'timestamp': '2025-09-10 02:52:59.037573', 'step': 6176, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:59.090501', 'step': 6176, 'epoch': 3} +{'type': 'loss', 'content': 0.014188756234943867, 'timestamp': '2025-09-10 02:52:59.098370', 'step': 6177, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:59.151951', 'step': 6177, 'epoch': 3} +{'type': 'loss', 'content': 0.001620362512767315, 'timestamp': '2025-09-10 02:52:59.154136', 'step': 6178, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:52:59.207337', 'step': 6178, 'epoch': 3} +{'type': 'loss', 'content': 0.014629093930125237, 'timestamp': '2025-09-10 02:52:59.209550', 'step': 6179, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:52:59.283324', 'step': 6179, 'epoch': 3} +{'type': 'loss', 'content': 0.013443955220282078, 'timestamp': '2025-09-10 02:52:59.297689', 'step': 6180, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:52:59.363087', 'step': 6180, 'epoch': 3} +{'type': 'loss', 'content': 0.00608267355710268, 'timestamp': '2025-09-10 02:52:59.376293', 'step': 6181, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:59.431820', 'step': 6181, 'epoch': 3} +{'type': 'loss', 'content': 0.0004723257734440267, 'timestamp': '2025-09-10 02:52:59.433872', 'step': 6182, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:59.488613', 'step': 6182, 'epoch': 3} +{'type': 'loss', 'content': 0.003553885966539383, 'timestamp': '2025-09-10 02:52:59.498409', 'step': 6183, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:52:59.570970', 'step': 6183, 'epoch': 3} +{'type': 'loss', 'content': 0.00032707207719795406, 'timestamp': '2025-09-10 02:52:59.585171', 'step': 6184, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:52:59.638281', 'step': 6184, 'epoch': 3} +{'type': 'loss', 'content': 0.0009438624256290495, 'timestamp': '2025-09-10 02:52:59.640664', 'step': 6185, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:52:59.710178', 'step': 6185, 'epoch': 3} +{'type': 'loss', 'content': 0.00011728469689842314, 'timestamp': '2025-09-10 02:52:59.723074', 'step': 6186, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:52:59.775687', 'step': 6186, 'epoch': 3} +{'type': 'loss', 'content': 8.939744293456897e-05, 'timestamp': '2025-09-10 02:52:59.777850', 'step': 6187, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:52:59.830557', 'step': 6187, 'epoch': 3} +{'type': 'loss', 'content': 0.003583366284146905, 'timestamp': '2025-09-10 02:52:59.839431', 'step': 6188, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:52:59.892898', 'step': 6188, 'epoch': 3} +{'type': 'loss', 'content': 0.0005416512722149491, 'timestamp': '2025-09-10 02:52:59.903411', 'step': 6189, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:52:59.956642', 'step': 6189, 'epoch': 3} +{'type': 'loss', 'content': 0.003713883925229311, 'timestamp': '2025-09-10 02:52:59.966238', 'step': 6190, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:00.019785', 'step': 6190, 'epoch': 3} +{'type': 'loss', 'content': 0.0013095579342916608, 'timestamp': '2025-09-10 02:53:00.022060', 'step': 6191, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:00.075655', 'step': 6191, 'epoch': 3} +{'type': 'loss', 'content': 0.0002630103554110974, 'timestamp': '2025-09-10 02:53:00.086062', 'step': 6192, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:00.138766', 'step': 6192, 'epoch': 3} +{'type': 'loss', 'content': 7.038398325676098e-05, 'timestamp': '2025-09-10 02:53:00.141078', 'step': 6193, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:00.195668', 'step': 6193, 'epoch': 3} +{'type': 'loss', 'content': 4.434372385730967e-05, 'timestamp': '2025-09-10 02:53:00.204408', 'step': 6194, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:00.259315', 'step': 6194, 'epoch': 3} +{'type': 'loss', 'content': 0.0019118929049000144, 'timestamp': '2025-09-10 02:53:00.266362', 'step': 6195, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:00.320393', 'step': 6195, 'epoch': 3} +{'type': 'loss', 'content': 0.00012861691357102245, 'timestamp': '2025-09-10 02:53:00.328667', 'step': 6196, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:00.381785', 'step': 6196, 'epoch': 3} +{'type': 'loss', 'content': 0.00030544851324521005, 'timestamp': '2025-09-10 02:53:00.392245', 'step': 6197, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:00.445490', 'step': 6197, 'epoch': 3} +{'type': 'loss', 'content': 1.9226237782277167e-05, 'timestamp': '2025-09-10 02:53:00.447579', 'step': 6198, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:00.500104', 'step': 6198, 'epoch': 3} +{'type': 'loss', 'content': 0.0008024958078749478, 'timestamp': '2025-09-10 02:53:00.502260', 'step': 6199, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:00.554976', 'step': 6199, 'epoch': 3} +{'type': 'loss', 'content': 0.0034939402248710394, 'timestamp': '2025-09-10 02:53:00.560709', 'step': 6200, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:00.613012', 'step': 6200, 'epoch': 3} +{'type': 'loss', 'content': 5.811767186969519e-05, 'timestamp': '2025-09-10 02:53:00.615179', 'step': 6201, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:00.667571', 'step': 6201, 'epoch': 3} +{'type': 'loss', 'content': 0.001254458911716938, 'timestamp': '2025-09-10 02:53:00.669881', 'step': 6202, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:00.722231', 'step': 6202, 'epoch': 3} +{'type': 'loss', 'content': 0.0003831215144600719, 'timestamp': '2025-09-10 02:53:00.724446', 'step': 6203, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:00.785840', 'step': 6203, 'epoch': 3} +{'type': 'loss', 'content': 0.00031643040711060166, 'timestamp': '2025-09-10 02:53:00.797682', 'step': 6204, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:00.849974', 'step': 6204, 'epoch': 3} +{'type': 'loss', 'content': 0.00032409251434728503, 'timestamp': '2025-09-10 02:53:00.852215', 'step': 6205, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:00.905670', 'step': 6205, 'epoch': 3} +{'type': 'loss', 'content': 0.0001230636116815731, 'timestamp': '2025-09-10 02:53:00.908127', 'step': 6206, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:00.963175', 'step': 6206, 'epoch': 3} +{'type': 'loss', 'content': 0.0018245227402076125, 'timestamp': '2025-09-10 02:53:00.970219', 'step': 6207, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:01.024014', 'step': 6207, 'epoch': 3} +{'type': 'loss', 'content': 0.0019986736588180065, 'timestamp': '2025-09-10 02:53:01.034881', 'step': 6208, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:01.106681', 'step': 6208, 'epoch': 3} +{'type': 'loss', 'content': 0.011568238958716393, 'timestamp': '2025-09-10 02:53:01.108790', 'step': 6209, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:01.161380', 'step': 6209, 'epoch': 3} +{'type': 'loss', 'content': 0.0005507374298758805, 'timestamp': '2025-09-10 02:53:01.167824', 'step': 6210, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:01.221538', 'step': 6210, 'epoch': 3} +{'type': 'loss', 'content': 0.00015149227692745626, 'timestamp': '2025-09-10 02:53:01.228806', 'step': 6211, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:01.282112', 'step': 6211, 'epoch': 3} +{'type': 'loss', 'content': 0.0010124669643118978, 'timestamp': '2025-09-10 02:53:01.287997', 'step': 6212, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:01.339946', 'step': 6212, 'epoch': 3} +{'type': 'loss', 'content': 0.0008196182898245752, 'timestamp': '2025-09-10 02:53:01.342211', 'step': 6213, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:01.395159', 'step': 6213, 'epoch': 3} +{'type': 'loss', 'content': 0.0002756024769041687, 'timestamp': '2025-09-10 02:53:01.397477', 'step': 6214, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:01.450322', 'step': 6214, 'epoch': 3} +{'type': 'loss', 'content': 0.0004791795217897743, 'timestamp': '2025-09-10 02:53:01.452531', 'step': 6215, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:01.505607', 'step': 6215, 'epoch': 3} +{'type': 'loss', 'content': 0.0031861786264926195, 'timestamp': '2025-09-10 02:53:01.512718', 'step': 6216, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:01.568430', 'step': 6216, 'epoch': 3} +{'type': 'loss', 'content': 0.0003523991326801479, 'timestamp': '2025-09-10 02:53:01.571664', 'step': 6217, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:53:01.650088', 'step': 6217, 'epoch': 3} +{'type': 'loss', 'content': 0.0004075986216776073, 'timestamp': '2025-09-10 02:53:01.663784', 'step': 6218, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:01.717659', 'step': 6218, 'epoch': 3} +{'type': 'loss', 'content': 0.006375530268996954, 'timestamp': '2025-09-10 02:53:01.720051', 'step': 6219, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:01.773376', 'step': 6219, 'epoch': 3} +{'type': 'loss', 'content': 0.011905818246304989, 'timestamp': '2025-09-10 02:53:01.783459', 'step': 6220, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:01.839031', 'step': 6220, 'epoch': 3} +{'type': 'loss', 'content': 0.0004494747263379395, 'timestamp': '2025-09-10 02:53:01.850885', 'step': 6221, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:53:01.928899', 'step': 6221, 'epoch': 3} +{'type': 'loss', 'content': 0.0004530021105892956, 'timestamp': '2025-09-10 02:53:01.941599', 'step': 6222, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:01.995143', 'step': 6222, 'epoch': 3} +{'type': 'loss', 'content': 0.00012486240302678198, 'timestamp': '2025-09-10 02:53:02.002780', 'step': 6223, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:02.055840', 'step': 6223, 'epoch': 3} +{'type': 'loss', 'content': 7.643323624506593e-05, 'timestamp': '2025-09-10 02:53:02.062247', 'step': 6224, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:02.114696', 'step': 6224, 'epoch': 3} +{'type': 'loss', 'content': 0.0001208492394653149, 'timestamp': '2025-09-10 02:53:02.116949', 'step': 6225, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:02.170164', 'step': 6225, 'epoch': 3} +{'type': 'loss', 'content': 0.0007338562281802297, 'timestamp': '2025-09-10 02:53:02.172395', 'step': 6226, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:02.225545', 'step': 6226, 'epoch': 3} +{'type': 'loss', 'content': 0.000566408911254257, 'timestamp': '2025-09-10 02:53:02.227698', 'step': 6227, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:02.280600', 'step': 6227, 'epoch': 3} +{'type': 'loss', 'content': 6.674916949123144e-05, 'timestamp': '2025-09-10 02:53:02.286642', 'step': 6228, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:02.339094', 'step': 6228, 'epoch': 3} +{'type': 'loss', 'content': 1.1840201295854058e-05, 'timestamp': '2025-09-10 02:53:02.341216', 'step': 6229, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:02.394610', 'step': 6229, 'epoch': 3} +{'type': 'loss', 'content': 0.003923589829355478, 'timestamp': '2025-09-10 02:53:02.404153', 'step': 6230, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:02.457174', 'step': 6230, 'epoch': 3} +{'type': 'loss', 'content': 0.002102156635373831, 'timestamp': '2025-09-10 02:53:02.459212', 'step': 6231, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:02.512156', 'step': 6231, 'epoch': 3} +{'type': 'loss', 'content': 0.001509518246166408, 'timestamp': '2025-09-10 02:53:02.519341', 'step': 6232, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:02.572362', 'step': 6232, 'epoch': 3} +{'type': 'loss', 'content': 0.0005916491500101984, 'timestamp': '2025-09-10 02:53:02.582913', 'step': 6233, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:02.636330', 'step': 6233, 'epoch': 3} +{'type': 'loss', 'content': 0.00018602547061163932, 'timestamp': '2025-09-10 02:53:02.638529', 'step': 6234, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:02.691303', 'step': 6234, 'epoch': 3} +{'type': 'loss', 'content': 0.0012755133211612701, 'timestamp': '2025-09-10 02:53:02.694084', 'step': 6235, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:02.746816', 'step': 6235, 'epoch': 3} +{'type': 'loss', 'content': 0.001339509035460651, 'timestamp': '2025-09-10 02:53:02.752995', 'step': 6236, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:02.805209', 'step': 6236, 'epoch': 3} +{'type': 'loss', 'content': 0.0006676294142380357, 'timestamp': '2025-09-10 02:53:02.807389', 'step': 6237, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:02.860172', 'step': 6237, 'epoch': 3} +{'type': 'loss', 'content': 0.0004006302042398602, 'timestamp': '2025-09-10 02:53:02.862501', 'step': 6238, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:02.914776', 'step': 6238, 'epoch': 3} +{'type': 'loss', 'content': 0.0003464436740614474, 'timestamp': '2025-09-10 02:53:02.917875', 'step': 6239, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:02.969826', 'step': 6239, 'epoch': 3} +{'type': 'loss', 'content': 5.9572183090494946e-05, 'timestamp': '2025-09-10 02:53:02.975666', 'step': 6240, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:53:03.041667', 'step': 6240, 'epoch': 3} +{'type': 'loss', 'content': 0.0002842635731212795, 'timestamp': '2025-09-10 02:53:03.055304', 'step': 6241, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:03.109434', 'step': 6241, 'epoch': 3} +{'type': 'loss', 'content': 0.00018574048590380698, 'timestamp': '2025-09-10 02:53:03.119077', 'step': 6242, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:03.172206', 'step': 6242, 'epoch': 3} +{'type': 'loss', 'content': 0.00011604089377215132, 'timestamp': '2025-09-10 02:53:03.174686', 'step': 6243, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:03.227367', 'step': 6243, 'epoch': 3} +{'type': 'loss', 'content': 0.0001450964919058606, 'timestamp': '2025-09-10 02:53:03.233449', 'step': 6244, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:03.285304', 'step': 6244, 'epoch': 3} +{'type': 'loss', 'content': 0.00018526332860346884, 'timestamp': '2025-09-10 02:53:03.293507', 'step': 6245, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:03.346214', 'step': 6245, 'epoch': 3} +{'type': 'loss', 'content': 0.0004118310462217778, 'timestamp': '2025-09-10 02:53:03.348356', 'step': 6246, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:53:03.421194', 'step': 6246, 'epoch': 3} +{'type': 'loss', 'content': 0.007664974313229322, 'timestamp': '2025-09-10 02:53:03.434895', 'step': 6247, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:03.488222', 'step': 6247, 'epoch': 3} +{'type': 'loss', 'content': 0.0003324569552205503, 'timestamp': '2025-09-10 02:53:03.495376', 'step': 6248, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:03.548297', 'step': 6248, 'epoch': 3} +{'type': 'loss', 'content': 0.00048176193377003074, 'timestamp': '2025-09-10 02:53:03.550739', 'step': 6249, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:03.603778', 'step': 6249, 'epoch': 3} +{'type': 'loss', 'content': 0.0005551331560127437, 'timestamp': '2025-09-10 02:53:03.605936', 'step': 6250, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:03.658680', 'step': 6250, 'epoch': 3} +{'type': 'loss', 'content': 5.801698353025131e-05, 'timestamp': '2025-09-10 02:53:03.661033', 'step': 6251, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:03.713967', 'step': 6251, 'epoch': 3} +{'type': 'loss', 'content': 0.0002592813689261675, 'timestamp': '2025-09-10 02:53:03.719856', 'step': 6252, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:03.772900', 'step': 6252, 'epoch': 3} +{'type': 'loss', 'content': 1.2752126167470124e-05, 'timestamp': '2025-09-10 02:53:03.779066', 'step': 6253, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:03.831613', 'step': 6253, 'epoch': 3} +{'type': 'loss', 'content': 0.01028306595981121, 'timestamp': '2025-09-10 02:53:03.834685', 'step': 6254, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:03.887555', 'step': 6254, 'epoch': 3} +{'type': 'loss', 'content': 0.00025878133601509035, 'timestamp': '2025-09-10 02:53:03.889693', 'step': 6255, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:03.942123', 'step': 6255, 'epoch': 3} +{'type': 'loss', 'content': 0.0040932828560471535, 'timestamp': '2025-09-10 02:53:03.951090', 'step': 6256, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:04.003196', 'step': 6256, 'epoch': 3} +{'type': 'loss', 'content': 6.455898255808279e-05, 'timestamp': '2025-09-10 02:53:04.005309', 'step': 6257, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:04.057743', 'step': 6257, 'epoch': 3} +{'type': 'loss', 'content': 4.057732076034881e-05, 'timestamp': '2025-09-10 02:53:04.064247', 'step': 6258, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:04.123765', 'step': 6258, 'epoch': 3} +{'type': 'loss', 'content': 0.00015628700202796608, 'timestamp': '2025-09-10 02:53:04.134435', 'step': 6259, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:04.186888', 'step': 6259, 'epoch': 3} +{'type': 'loss', 'content': 0.0005173964309506118, 'timestamp': '2025-09-10 02:53:04.192542', 'step': 6260, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:04.244198', 'step': 6260, 'epoch': 3} +{'type': 'loss', 'content': 5.169626820133999e-05, 'timestamp': '2025-09-10 02:53:04.246429', 'step': 6261, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:04.298955', 'step': 6261, 'epoch': 3} +{'type': 'loss', 'content': 0.0009875959949567914, 'timestamp': '2025-09-10 02:53:04.305529', 'step': 6262, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:04.358412', 'step': 6262, 'epoch': 3} +{'type': 'loss', 'content': 0.0002259409084217623, 'timestamp': '2025-09-10 02:53:04.361274', 'step': 6263, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:04.414309', 'step': 6263, 'epoch': 3} +{'type': 'loss', 'content': 0.00031470516114495695, 'timestamp': '2025-09-10 02:53:04.419886', 'step': 6264, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:04.472004', 'step': 6264, 'epoch': 3} +{'type': 'loss', 'content': 0.0017533283680677414, 'timestamp': '2025-09-10 02:53:04.480395', 'step': 6265, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:04.532855', 'step': 6265, 'epoch': 3} +{'type': 'loss', 'content': 0.000321357452776283, 'timestamp': '2025-09-10 02:53:04.535149', 'step': 6266, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:04.588209', 'step': 6266, 'epoch': 3} +{'type': 'loss', 'content': 0.00523083982989192, 'timestamp': '2025-09-10 02:53:04.590480', 'step': 6267, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:04.642916', 'step': 6267, 'epoch': 3} +{'type': 'loss', 'content': 0.00013569157454185188, 'timestamp': '2025-09-10 02:53:04.648568', 'step': 6268, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:04.700953', 'step': 6268, 'epoch': 3} +{'type': 'loss', 'content': 0.00020350891281850636, 'timestamp': '2025-09-10 02:53:04.702936', 'step': 6269, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:04.757287', 'step': 6269, 'epoch': 3} +{'type': 'loss', 'content': 0.0001151436663349159, 'timestamp': '2025-09-10 02:53:04.767099', 'step': 6270, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:04.819819', 'step': 6270, 'epoch': 3} +{'type': 'loss', 'content': 0.023529188707470894, 'timestamp': '2025-09-10 02:53:04.822871', 'step': 6271, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:04.875785', 'step': 6271, 'epoch': 3} +{'type': 'loss', 'content': 0.0004282921727281064, 'timestamp': '2025-09-10 02:53:04.881725', 'step': 6272, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:04.933531', 'step': 6272, 'epoch': 3} +{'type': 'loss', 'content': 0.0014876218046993017, 'timestamp': '2025-09-10 02:53:04.936929', 'step': 6273, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:04.991898', 'step': 6273, 'epoch': 3} +{'type': 'loss', 'content': 0.00011225293565075845, 'timestamp': '2025-09-10 02:53:04.994084', 'step': 6274, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:05.047754', 'step': 6274, 'epoch': 3} +{'type': 'loss', 'content': 0.00041951602906920016, 'timestamp': '2025-09-10 02:53:05.057366', 'step': 6275, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:05.111449', 'step': 6275, 'epoch': 3} +{'type': 'loss', 'content': 4.044321394758299e-05, 'timestamp': '2025-09-10 02:53:05.122026', 'step': 6276, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:05.174318', 'step': 6276, 'epoch': 3} +{'type': 'loss', 'content': 0.00010085922986036167, 'timestamp': '2025-09-10 02:53:05.176543', 'step': 6277, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:05.238544', 'step': 6277, 'epoch': 3} +{'type': 'loss', 'content': 0.0008098947000689805, 'timestamp': '2025-09-10 02:53:05.240764', 'step': 6278, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:05.293203', 'step': 6278, 'epoch': 3} +{'type': 'loss', 'content': 9.599256736692041e-05, 'timestamp': '2025-09-10 02:53:05.295465', 'step': 6279, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:05.347769', 'step': 6279, 'epoch': 3} +{'type': 'loss', 'content': 0.0563594289124012, 'timestamp': '2025-09-10 02:53:05.353446', 'step': 6280, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:05.405618', 'step': 6280, 'epoch': 3} +{'type': 'loss', 'content': 0.0002962738217320293, 'timestamp': '2025-09-10 02:53:05.413830', 'step': 6281, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:05.465885', 'step': 6281, 'epoch': 3} +{'type': 'loss', 'content': 0.0004825623764190823, 'timestamp': '2025-09-10 02:53:05.468861', 'step': 6282, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:05.527686', 'step': 6282, 'epoch': 3} +{'type': 'loss', 'content': 0.00011310545960441232, 'timestamp': '2025-09-10 02:53:05.538120', 'step': 6283, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:05.591285', 'step': 6283, 'epoch': 3} +{'type': 'loss', 'content': 0.0003833844675682485, 'timestamp': '2025-09-10 02:53:05.596925', 'step': 6284, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:05.649436', 'step': 6284, 'epoch': 3} +{'type': 'loss', 'content': 0.0005850231973454356, 'timestamp': '2025-09-10 02:53:05.652407', 'step': 6285, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:05.712310', 'step': 6285, 'epoch': 3} +{'type': 'loss', 'content': 4.5168409997131675e-05, 'timestamp': '2025-09-10 02:53:05.723059', 'step': 6286, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:05.777002', 'step': 6286, 'epoch': 3} +{'type': 'loss', 'content': 8.450110908597708e-05, 'timestamp': '2025-09-10 02:53:05.786562', 'step': 6287, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:05.838940', 'step': 6287, 'epoch': 3} +{'type': 'loss', 'content': 0.00016273361688945442, 'timestamp': '2025-09-10 02:53:05.844711', 'step': 6288, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:05.897167', 'step': 6288, 'epoch': 3} +{'type': 'loss', 'content': 0.0005180092412047088, 'timestamp': '2025-09-10 02:53:05.899303', 'step': 6289, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:05.952277', 'step': 6289, 'epoch': 3} +{'type': 'loss', 'content': 0.0006385315791703761, 'timestamp': '2025-09-10 02:53:05.954673', 'step': 6290, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:06.007065', 'step': 6290, 'epoch': 3} +{'type': 'loss', 'content': 0.0004625410947483033, 'timestamp': '2025-09-10 02:53:06.009201', 'step': 6291, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:06.062111', 'step': 6291, 'epoch': 3} +{'type': 'loss', 'content': 0.00010765146726043895, 'timestamp': '2025-09-10 02:53:06.068031', 'step': 6292, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:06.119909', 'step': 6292, 'epoch': 3} +{'type': 'loss', 'content': 7.746909250272438e-05, 'timestamp': '2025-09-10 02:53:06.122161', 'step': 6293, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:06.175118', 'step': 6293, 'epoch': 3} +{'type': 'loss', 'content': 0.010182967409491539, 'timestamp': '2025-09-10 02:53:06.177405', 'step': 6294, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:06.230576', 'step': 6294, 'epoch': 3} +{'type': 'loss', 'content': 0.0006712442263960838, 'timestamp': '2025-09-10 02:53:06.232795', 'step': 6295, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:06.285477', 'step': 6295, 'epoch': 3} +{'type': 'loss', 'content': 4.2704712541308254e-05, 'timestamp': '2025-09-10 02:53:06.291072', 'step': 6296, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:06.343390', 'step': 6296, 'epoch': 3} +{'type': 'loss', 'content': 0.0001639856054680422, 'timestamp': '2025-09-10 02:53:06.353560', 'step': 6297, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:06.407937', 'step': 6297, 'epoch': 3} +{'type': 'loss', 'content': 0.0001479656930314377, 'timestamp': '2025-09-10 02:53:06.410341', 'step': 6298, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:06.463333', 'step': 6298, 'epoch': 3} +{'type': 'loss', 'content': 0.004781940020620823, 'timestamp': '2025-09-10 02:53:06.471491', 'step': 6299, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:06.526139', 'step': 6299, 'epoch': 3} +{'type': 'loss', 'content': 0.00015522715693805367, 'timestamp': '2025-09-10 02:53:06.536718', 'step': 6300, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:06.588779', 'step': 6300, 'epoch': 3} +{'type': 'loss', 'content': 1.8653279767022468e-05, 'timestamp': '2025-09-10 02:53:06.590965', 'step': 6301, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:06.644052', 'step': 6301, 'epoch': 3} +{'type': 'loss', 'content': 7.126219861675054e-05, 'timestamp': '2025-09-10 02:53:06.650663', 'step': 6302, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:06.703848', 'step': 6302, 'epoch': 3} +{'type': 'loss', 'content': 2.5612262106733397e-05, 'timestamp': '2025-09-10 02:53:06.705910', 'step': 6303, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:06.758360', 'step': 6303, 'epoch': 3} +{'type': 'loss', 'content': 0.0001806333748390898, 'timestamp': '2025-09-10 02:53:06.767425', 'step': 6304, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:06.820217', 'step': 6304, 'epoch': 3} +{'type': 'loss', 'content': 0.0003336466324981302, 'timestamp': '2025-09-10 02:53:06.826672', 'step': 6305, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:06.879599', 'step': 6305, 'epoch': 3} +{'type': 'loss', 'content': 0.00016676145605742931, 'timestamp': '2025-09-10 02:53:06.882670', 'step': 6306, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:06.934997', 'step': 6306, 'epoch': 3} +{'type': 'loss', 'content': 6.678514182567596e-05, 'timestamp': '2025-09-10 02:53:06.936930', 'step': 6307, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:06.990430', 'step': 6307, 'epoch': 3} +{'type': 'loss', 'content': 0.0012330238241702318, 'timestamp': '2025-09-10 02:53:07.000810', 'step': 6308, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:07.053197', 'step': 6308, 'epoch': 3} +{'type': 'loss', 'content': 0.0002929836919065565, 'timestamp': '2025-09-10 02:53:07.055382', 'step': 6309, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:07.107926', 'step': 6309, 'epoch': 3} +{'type': 'loss', 'content': 1.920703834912274e-05, 'timestamp': '2025-09-10 02:53:07.109969', 'step': 6310, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:07.162517', 'step': 6310, 'epoch': 3} +{'type': 'loss', 'content': 0.000312409974867478, 'timestamp': '2025-09-10 02:53:07.169109', 'step': 6311, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:07.221887', 'step': 6311, 'epoch': 3} +{'type': 'loss', 'content': 8.205670019378886e-05, 'timestamp': '2025-09-10 02:53:07.229275', 'step': 6312, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:07.281961', 'step': 6312, 'epoch': 3} +{'type': 'loss', 'content': 4.2824809497687966e-05, 'timestamp': '2025-09-10 02:53:07.284138', 'step': 6313, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:07.341893', 'step': 6313, 'epoch': 3} +{'type': 'loss', 'content': 1.1424997865105979e-05, 'timestamp': '2025-09-10 02:53:07.352294', 'step': 6314, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:07.405279', 'step': 6314, 'epoch': 3} +{'type': 'loss', 'content': 0.000568813644349575, 'timestamp': '2025-09-10 02:53:07.407585', 'step': 6315, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:07.461177', 'step': 6315, 'epoch': 3} +{'type': 'loss', 'content': 0.0018300736555829644, 'timestamp': '2025-09-10 02:53:07.471584', 'step': 6316, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:07.523811', 'step': 6316, 'epoch': 3} +{'type': 'loss', 'content': 0.0003173965960741043, 'timestamp': '2025-09-10 02:53:07.526998', 'step': 6317, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:07.581613', 'step': 6317, 'epoch': 3} +{'type': 'loss', 'content': 0.005713038146495819, 'timestamp': '2025-09-10 02:53:07.591375', 'step': 6318, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:07.643924', 'step': 6318, 'epoch': 3} +{'type': 'loss', 'content': 0.00014197020209394395, 'timestamp': '2025-09-10 02:53:07.646081', 'step': 6319, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:07.698785', 'step': 6319, 'epoch': 3} +{'type': 'loss', 'content': 0.000491217419039458, 'timestamp': '2025-09-10 02:53:07.704615', 'step': 6320, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:07.757127', 'step': 6320, 'epoch': 3} +{'type': 'loss', 'content': 0.001874455832876265, 'timestamp': '2025-09-10 02:53:07.759410', 'step': 6321, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:53:24.635358', 'step': 6321, 'epoch': 3} +{'type': 'pplx', 'content': 25623746.72768853, 'timestamp': '2025-09-10 02:53:24.640638', 'step': 6321, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:24.697537', 'step': 6321, 'epoch': 3} +{'type': 'loss', 'content': 0.0002497486711945385, 'timestamp': '2025-09-10 02:53:24.700228', 'step': 6322, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:24.755203', 'step': 6322, 'epoch': 3} +{'type': 'loss', 'content': 0.008116251789033413, 'timestamp': '2025-09-10 02:53:24.757226', 'step': 6323, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:24.810884', 'step': 6323, 'epoch': 3} +{'type': 'loss', 'content': 0.0002907492744270712, 'timestamp': '2025-09-10 02:53:24.817011', 'step': 6324, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:24.873564', 'step': 6324, 'epoch': 3} +{'type': 'loss', 'content': 1.2124964086979162e-05, 'timestamp': '2025-09-10 02:53:24.877496', 'step': 6325, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:24.936907', 'step': 6325, 'epoch': 3} +{'type': 'loss', 'content': 0.0004172473563812673, 'timestamp': '2025-09-10 02:53:24.944113', 'step': 6326, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:25.009368', 'step': 6326, 'epoch': 3} +{'type': 'loss', 'content': 0.0001568460138514638, 'timestamp': '2025-09-10 02:53:25.020078', 'step': 6327, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:53:25.093776', 'step': 6327, 'epoch': 3} +{'type': 'loss', 'content': 0.0008111385977827013, 'timestamp': '2025-09-10 02:53:25.107126', 'step': 6328, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:25.160566', 'step': 6328, 'epoch': 3} +{'type': 'loss', 'content': 0.007448896765708923, 'timestamp': '2025-09-10 02:53:25.169677', 'step': 6329, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:25.230643', 'step': 6329, 'epoch': 3} +{'type': 'loss', 'content': 0.007777568884193897, 'timestamp': '2025-09-10 02:53:25.237213', 'step': 6330, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:25.297291', 'step': 6330, 'epoch': 3} +{'type': 'loss', 'content': 0.0008014828781597316, 'timestamp': '2025-09-10 02:53:25.299330', 'step': 6331, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:25.352642', 'step': 6331, 'epoch': 3} +{'type': 'loss', 'content': 0.0011677221627905965, 'timestamp': '2025-09-10 02:53:25.358366', 'step': 6332, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:25.412505', 'step': 6332, 'epoch': 3} +{'type': 'loss', 'content': 4.402061676955782e-05, 'timestamp': '2025-09-10 02:53:25.422514', 'step': 6333, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:25.476141', 'step': 6333, 'epoch': 3} +{'type': 'loss', 'content': 0.0009365716832689941, 'timestamp': '2025-09-10 02:53:25.478152', 'step': 6334, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:25.533989', 'step': 6334, 'epoch': 3} +{'type': 'loss', 'content': 0.0011481484398245811, 'timestamp': '2025-09-10 02:53:25.536188', 'step': 6335, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:25.590892', 'step': 6335, 'epoch': 3} +{'type': 'loss', 'content': 0.004527546465396881, 'timestamp': '2025-09-10 02:53:25.601474', 'step': 6336, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:25.654330', 'step': 6336, 'epoch': 3} +{'type': 'loss', 'content': 0.004685709718614817, 'timestamp': '2025-09-10 02:53:25.657150', 'step': 6337, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:25.709620', 'step': 6337, 'epoch': 3} +{'type': 'loss', 'content': 6.837741238996387e-05, 'timestamp': '2025-09-10 02:53:25.712721', 'step': 6338, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:25.771977', 'step': 6338, 'epoch': 3} +{'type': 'loss', 'content': 9.559287718730047e-05, 'timestamp': '2025-09-10 02:53:25.773975', 'step': 6339, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:25.834825', 'step': 6339, 'epoch': 3} +{'type': 'loss', 'content': 0.00011402172822272405, 'timestamp': '2025-09-10 02:53:25.845211', 'step': 6340, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:25.897487', 'step': 6340, 'epoch': 3} +{'type': 'loss', 'content': 6.443385063903406e-05, 'timestamp': '2025-09-10 02:53:25.900114', 'step': 6341, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:25.957717', 'step': 6341, 'epoch': 3} +{'type': 'loss', 'content': 0.0013485607923939824, 'timestamp': '2025-09-10 02:53:25.959881', 'step': 6342, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:26.012811', 'step': 6342, 'epoch': 3} +{'type': 'loss', 'content': 0.00014597535482607782, 'timestamp': '2025-09-10 02:53:26.014930', 'step': 6343, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:26.067972', 'step': 6343, 'epoch': 3} +{'type': 'loss', 'content': 5.369337304728106e-05, 'timestamp': '2025-09-10 02:53:26.075075', 'step': 6344, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:26.142656', 'step': 6344, 'epoch': 3} +{'type': 'loss', 'content': 0.00011516348604345694, 'timestamp': '2025-09-10 02:53:26.144965', 'step': 6345, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:26.198266', 'step': 6345, 'epoch': 3} +{'type': 'loss', 'content': 0.0037112042773514986, 'timestamp': '2025-09-10 02:53:26.204679', 'step': 6346, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:26.263888', 'step': 6346, 'epoch': 3} +{'type': 'loss', 'content': 0.006334986537694931, 'timestamp': '2025-09-10 02:53:26.266074', 'step': 6347, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:26.320396', 'step': 6347, 'epoch': 3} +{'type': 'loss', 'content': 0.00020315279834903777, 'timestamp': '2025-09-10 02:53:26.327730', 'step': 6348, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:26.380702', 'step': 6348, 'epoch': 3} +{'type': 'loss', 'content': 0.004242732655256987, 'timestamp': '2025-09-10 02:53:26.382852', 'step': 6349, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:26.435996', 'step': 6349, 'epoch': 3} +{'type': 'loss', 'content': 4.986769272363745e-05, 'timestamp': '2025-09-10 02:53:26.439838', 'step': 6350, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:26.495395', 'step': 6350, 'epoch': 3} +{'type': 'loss', 'content': 2.9768902095383964e-05, 'timestamp': '2025-09-10 02:53:26.498188', 'step': 6351, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:26.551085', 'step': 6351, 'epoch': 3} +{'type': 'loss', 'content': 0.03486521542072296, 'timestamp': '2025-09-10 02:53:26.557009', 'step': 6352, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:26.609023', 'step': 6352, 'epoch': 3} +{'type': 'loss', 'content': 0.04174187034368515, 'timestamp': '2025-09-10 02:53:26.611012', 'step': 6353, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:26.663773', 'step': 6353, 'epoch': 3} +{'type': 'loss', 'content': 3.407457552384585e-05, 'timestamp': '2025-09-10 02:53:26.666597', 'step': 6354, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:26.719374', 'step': 6354, 'epoch': 3} +{'type': 'loss', 'content': 0.00010306506737833843, 'timestamp': '2025-09-10 02:53:26.725986', 'step': 6355, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:26.779139', 'step': 6355, 'epoch': 3} +{'type': 'loss', 'content': 7.052934506646125e-06, 'timestamp': '2025-09-10 02:53:26.784981', 'step': 6356, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:26.837355', 'step': 6356, 'epoch': 3} +{'type': 'loss', 'content': 0.00034755567321553826, 'timestamp': '2025-09-10 02:53:26.847498', 'step': 6357, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:26.901942', 'step': 6357, 'epoch': 3} +{'type': 'loss', 'content': 0.0005519238184206188, 'timestamp': '2025-09-10 02:53:26.904167', 'step': 6358, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:26.957952', 'step': 6358, 'epoch': 3} +{'type': 'loss', 'content': 0.0001442223001504317, 'timestamp': '2025-09-10 02:53:26.960120', 'step': 6359, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:27.013551', 'step': 6359, 'epoch': 3} +{'type': 'loss', 'content': 0.0010180289391428232, 'timestamp': '2025-09-10 02:53:27.019719', 'step': 6360, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:27.072664', 'step': 6360, 'epoch': 3} +{'type': 'loss', 'content': 4.5201082684798166e-05, 'timestamp': '2025-09-10 02:53:27.075135', 'step': 6361, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:27.135535', 'step': 6361, 'epoch': 3} +{'type': 'loss', 'content': 0.0007746697519905865, 'timestamp': '2025-09-10 02:53:27.146216', 'step': 6362, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:27.199097', 'step': 6362, 'epoch': 3} +{'type': 'loss', 'content': 0.001340774237178266, 'timestamp': '2025-09-10 02:53:27.201454', 'step': 6363, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:27.254708', 'step': 6363, 'epoch': 3} +{'type': 'loss', 'content': 0.010940327309072018, 'timestamp': '2025-09-10 02:53:27.260583', 'step': 6364, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:27.312905', 'step': 6364, 'epoch': 3} +{'type': 'loss', 'content': 0.00010309758363291621, 'timestamp': '2025-09-10 02:53:27.315026', 'step': 6365, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:27.368257', 'step': 6365, 'epoch': 3} +{'type': 'loss', 'content': 0.00046365702291950583, 'timestamp': '2025-09-10 02:53:27.370284', 'step': 6366, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:27.437055', 'step': 6366, 'epoch': 3} +{'type': 'loss', 'content': 0.00015972986875567585, 'timestamp': '2025-09-10 02:53:27.449287', 'step': 6367, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:27.503891', 'step': 6367, 'epoch': 3} +{'type': 'loss', 'content': 4.685276508098468e-05, 'timestamp': '2025-09-10 02:53:27.510120', 'step': 6368, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:27.563084', 'step': 6368, 'epoch': 3} +{'type': 'loss', 'content': 0.005840769503265619, 'timestamp': '2025-09-10 02:53:27.565305', 'step': 6369, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:27.618784', 'step': 6369, 'epoch': 3} +{'type': 'loss', 'content': 0.0017197491833940148, 'timestamp': '2025-09-10 02:53:27.620963', 'step': 6370, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:27.687177', 'step': 6370, 'epoch': 3} +{'type': 'loss', 'content': 0.002981850178912282, 'timestamp': '2025-09-10 02:53:27.699372', 'step': 6371, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:53:27.772787', 'step': 6371, 'epoch': 3} +{'type': 'loss', 'content': 0.0013454955769702792, 'timestamp': '2025-09-10 02:53:27.787064', 'step': 6372, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:27.839441', 'step': 6372, 'epoch': 3} +{'type': 'loss', 'content': 0.0002641402243170887, 'timestamp': '2025-09-10 02:53:27.841856', 'step': 6373, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:27.894835', 'step': 6373, 'epoch': 3} +{'type': 'loss', 'content': 0.00016312948719132692, 'timestamp': '2025-09-10 02:53:27.897001', 'step': 6374, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:27.951591', 'step': 6374, 'epoch': 3} +{'type': 'loss', 'content': 3.924271368305199e-05, 'timestamp': '2025-09-10 02:53:27.961414', 'step': 6375, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:28.016772', 'step': 6375, 'epoch': 3} +{'type': 'loss', 'content': 8.935371442930773e-05, 'timestamp': '2025-09-10 02:53:28.027356', 'step': 6376, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:53:28.099814', 'step': 6376, 'epoch': 3} +{'type': 'loss', 'content': 0.05012155696749687, 'timestamp': '2025-09-10 02:53:28.114986', 'step': 6377, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:28.169273', 'step': 6377, 'epoch': 3} +{'type': 'loss', 'content': 0.00021866110910195857, 'timestamp': '2025-09-10 02:53:28.179067', 'step': 6378, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:28.232060', 'step': 6378, 'epoch': 3} +{'type': 'loss', 'content': 0.0001106784911826253, 'timestamp': '2025-09-10 02:53:28.234420', 'step': 6379, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:28.287853', 'step': 6379, 'epoch': 3} +{'type': 'loss', 'content': 0.00042406810098327696, 'timestamp': '2025-09-10 02:53:28.295534', 'step': 6380, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:28.347633', 'step': 6380, 'epoch': 3} +{'type': 'loss', 'content': 0.03518061712384224, 'timestamp': '2025-09-10 02:53:28.349983', 'step': 6381, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:28.403934', 'step': 6381, 'epoch': 3} +{'type': 'loss', 'content': 0.00012866409088019282, 'timestamp': '2025-09-10 02:53:28.413555', 'step': 6382, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:28.467180', 'step': 6382, 'epoch': 3} +{'type': 'loss', 'content': 0.0006021680892445147, 'timestamp': '2025-09-10 02:53:28.475339', 'step': 6383, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:28.528635', 'step': 6383, 'epoch': 3} +{'type': 'loss', 'content': 0.0162067711353302, 'timestamp': '2025-09-10 02:53:28.534324', 'step': 6384, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:28.586867', 'step': 6384, 'epoch': 3} +{'type': 'loss', 'content': 0.0058546424843370914, 'timestamp': '2025-09-10 02:53:28.588984', 'step': 6385, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:28.641703', 'step': 6385, 'epoch': 3} +{'type': 'loss', 'content': 1.4167162589728832e-05, 'timestamp': '2025-09-10 02:53:28.644861', 'step': 6386, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:28.697981', 'step': 6386, 'epoch': 3} +{'type': 'loss', 'content': 6.715995550621301e-05, 'timestamp': '2025-09-10 02:53:28.700252', 'step': 6387, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:28.753283', 'step': 6387, 'epoch': 3} +{'type': 'loss', 'content': 2.2044809156795964e-05, 'timestamp': '2025-09-10 02:53:28.760741', 'step': 6388, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:28.813041', 'step': 6388, 'epoch': 3} +{'type': 'loss', 'content': 4.692918810178526e-05, 'timestamp': '2025-09-10 02:53:28.815466', 'step': 6389, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:28.869528', 'step': 6389, 'epoch': 3} +{'type': 'loss', 'content': 0.005919909570366144, 'timestamp': '2025-09-10 02:53:28.879132', 'step': 6390, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:28.932292', 'step': 6390, 'epoch': 3} +{'type': 'loss', 'content': 0.0008166728657670319, 'timestamp': '2025-09-10 02:53:28.934496', 'step': 6391, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:28.987320', 'step': 6391, 'epoch': 3} +{'type': 'loss', 'content': 3.9261107303900644e-05, 'timestamp': '2025-09-10 02:53:28.993042', 'step': 6392, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:29.046072', 'step': 6392, 'epoch': 3} +{'type': 'loss', 'content': 0.0018909344216808677, 'timestamp': '2025-09-10 02:53:29.054344', 'step': 6393, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:53:29.123712', 'step': 6393, 'epoch': 3} +{'type': 'loss', 'content': 7.440822810167447e-05, 'timestamp': '2025-09-10 02:53:29.136390', 'step': 6394, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:29.189592', 'step': 6394, 'epoch': 3} +{'type': 'loss', 'content': 0.00010338453284930438, 'timestamp': '2025-09-10 02:53:29.191832', 'step': 6395, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:29.253495', 'step': 6395, 'epoch': 3} +{'type': 'loss', 'content': 0.0003287374565843493, 'timestamp': '2025-09-10 02:53:29.265360', 'step': 6396, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:29.317765', 'step': 6396, 'epoch': 3} +{'type': 'loss', 'content': 0.0018978974549099803, 'timestamp': '2025-09-10 02:53:29.319847', 'step': 6397, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:53:29.380904', 'step': 6397, 'epoch': 3} +{'type': 'loss', 'content': 4.915746467304416e-05, 'timestamp': '2025-09-10 02:53:29.391849', 'step': 6398, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:29.445234', 'step': 6398, 'epoch': 3} +{'type': 'loss', 'content': 0.00015465223987121135, 'timestamp': '2025-09-10 02:53:29.448456', 'step': 6399, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:29.501559', 'step': 6399, 'epoch': 3} +{'type': 'loss', 'content': 0.0007834541029296815, 'timestamp': '2025-09-10 02:53:29.507833', 'step': 6400, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:29.562621', 'step': 6400, 'epoch': 3} +{'type': 'loss', 'content': 3.472758180578239e-05, 'timestamp': '2025-09-10 02:53:29.564847', 'step': 6401, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:29.622952', 'step': 6401, 'epoch': 3} +{'type': 'loss', 'content': 0.008757556788623333, 'timestamp': '2025-09-10 02:53:29.633326', 'step': 6402, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:29.687339', 'step': 6402, 'epoch': 3} +{'type': 'loss', 'content': 0.021163642406463623, 'timestamp': '2025-09-10 02:53:29.697008', 'step': 6403, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:29.755274', 'step': 6403, 'epoch': 3} +{'type': 'loss', 'content': 8.233611151808873e-05, 'timestamp': '2025-09-10 02:53:29.762698', 'step': 6404, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:29.816742', 'step': 6404, 'epoch': 3} +{'type': 'loss', 'content': 0.0006255786283873022, 'timestamp': '2025-09-10 02:53:29.819122', 'step': 6405, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:29.873873', 'step': 6405, 'epoch': 3} +{'type': 'loss', 'content': 0.00029943903791718185, 'timestamp': '2025-09-10 02:53:29.876168', 'step': 6406, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:29.930397', 'step': 6406, 'epoch': 3} +{'type': 'loss', 'content': 0.00017056668002624065, 'timestamp': '2025-09-10 02:53:29.934987', 'step': 6407, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:29.990577', 'step': 6407, 'epoch': 3} +{'type': 'loss', 'content': 0.00011894209455931559, 'timestamp': '2025-09-10 02:53:29.996387', 'step': 6408, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:30.048421', 'step': 6408, 'epoch': 3} +{'type': 'loss', 'content': 5.540851270779967e-05, 'timestamp': '2025-09-10 02:53:30.056743', 'step': 6409, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:30.110212', 'step': 6409, 'epoch': 3} +{'type': 'loss', 'content': 6.922144530108199e-05, 'timestamp': '2025-09-10 02:53:30.116870', 'step': 6410, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:30.171852', 'step': 6410, 'epoch': 3} +{'type': 'loss', 'content': 0.0003103779745288193, 'timestamp': '2025-09-10 02:53:30.180928', 'step': 6411, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:30.237944', 'step': 6411, 'epoch': 3} +{'type': 'loss', 'content': 0.00043122057104483247, 'timestamp': '2025-09-10 02:53:30.244966', 'step': 6412, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:30.306461', 'step': 6412, 'epoch': 3} +{'type': 'loss', 'content': 0.00034825937473215163, 'timestamp': '2025-09-10 02:53:30.318002', 'step': 6413, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:53:30.386908', 'step': 6413, 'epoch': 3} +{'type': 'loss', 'content': 0.000860480242408812, 'timestamp': '2025-09-10 02:53:30.399437', 'step': 6414, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:30.453621', 'step': 6414, 'epoch': 3} +{'type': 'loss', 'content': 0.0005650802631862462, 'timestamp': '2025-09-10 02:53:30.455818', 'step': 6415, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:30.508995', 'step': 6415, 'epoch': 3} +{'type': 'loss', 'content': 0.0009750666213221848, 'timestamp': '2025-09-10 02:53:30.515270', 'step': 6416, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:30.572175', 'step': 6416, 'epoch': 3} +{'type': 'loss', 'content': 0.0016369502991437912, 'timestamp': '2025-09-10 02:53:30.583389', 'step': 6417, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:30.637005', 'step': 6417, 'epoch': 3} +{'type': 'loss', 'content': 6.572590064024553e-05, 'timestamp': '2025-09-10 02:53:30.643479', 'step': 6418, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:30.697282', 'step': 6418, 'epoch': 3} +{'type': 'loss', 'content': 4.367561268736608e-05, 'timestamp': '2025-09-10 02:53:30.703600', 'step': 6419, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:30.757429', 'step': 6419, 'epoch': 3} +{'type': 'loss', 'content': 0.0013939932687208056, 'timestamp': '2025-09-10 02:53:30.763611', 'step': 6420, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:30.819989', 'step': 6420, 'epoch': 3} +{'type': 'loss', 'content': 0.0001453318545827642, 'timestamp': '2025-09-10 02:53:30.822218', 'step': 6421, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:30.876569', 'step': 6421, 'epoch': 3} +{'type': 'loss', 'content': 0.0007501809159293771, 'timestamp': '2025-09-10 02:53:30.886363', 'step': 6422, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:30.939690', 'step': 6422, 'epoch': 3} +{'type': 'loss', 'content': 0.0007267541368491948, 'timestamp': '2025-09-10 02:53:30.941753', 'step': 6423, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:30.994705', 'step': 6423, 'epoch': 3} +{'type': 'loss', 'content': 0.0015470579965040088, 'timestamp': '2025-09-10 02:53:31.001906', 'step': 6424, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:31.054506', 'step': 6424, 'epoch': 3} +{'type': 'loss', 'content': 0.04970637708902359, 'timestamp': '2025-09-10 02:53:31.056778', 'step': 6425, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:31.109457', 'step': 6425, 'epoch': 3} +{'type': 'loss', 'content': 0.000580643187277019, 'timestamp': '2025-09-10 02:53:31.111694', 'step': 6426, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:31.171715', 'step': 6426, 'epoch': 3} +{'type': 'loss', 'content': 0.001214796444401145, 'timestamp': '2025-09-10 02:53:31.182445', 'step': 6427, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:31.235524', 'step': 6427, 'epoch': 3} +{'type': 'loss', 'content': 0.0005449260352179408, 'timestamp': '2025-09-10 02:53:31.241164', 'step': 6428, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:31.293458', 'step': 6428, 'epoch': 3} +{'type': 'loss', 'content': 0.00018538626318331808, 'timestamp': '2025-09-10 02:53:31.295517', 'step': 6429, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:31.348071', 'step': 6429, 'epoch': 3} +{'type': 'loss', 'content': 0.0002393820323050022, 'timestamp': '2025-09-10 02:53:31.351277', 'step': 6430, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:31.404812', 'step': 6430, 'epoch': 3} +{'type': 'loss', 'content': 0.00010630719771143049, 'timestamp': '2025-09-10 02:53:31.406983', 'step': 6431, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:53:31.487130', 'step': 6431, 'epoch': 3} +{'type': 'loss', 'content': 0.0002240373141830787, 'timestamp': '2025-09-10 02:53:31.502992', 'step': 6432, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:31.555977', 'step': 6432, 'epoch': 3} +{'type': 'loss', 'content': 0.0007287138141691685, 'timestamp': '2025-09-10 02:53:31.558189', 'step': 6433, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:31.612876', 'step': 6433, 'epoch': 3} +{'type': 'loss', 'content': 1.3637529264087789e-05, 'timestamp': '2025-09-10 02:53:31.622673', 'step': 6434, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:31.674781', 'step': 6434, 'epoch': 3} +{'type': 'loss', 'content': 0.0012591794366016984, 'timestamp': '2025-09-10 02:53:31.676868', 'step': 6435, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:31.729778', 'step': 6435, 'epoch': 3} +{'type': 'loss', 'content': 0.0013732012594118714, 'timestamp': '2025-09-10 02:53:31.738829', 'step': 6436, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:31.791486', 'step': 6436, 'epoch': 3} +{'type': 'loss', 'content': 0.03234048932790756, 'timestamp': '2025-09-10 02:53:31.793562', 'step': 6437, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:31.855184', 'step': 6437, 'epoch': 3} +{'type': 'loss', 'content': 0.0011566213797777891, 'timestamp': '2025-09-10 02:53:31.866266', 'step': 6438, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:31.919785', 'step': 6438, 'epoch': 3} +{'type': 'loss', 'content': 0.04029754176735878, 'timestamp': '2025-09-10 02:53:31.926311', 'step': 6439, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:31.979264', 'step': 6439, 'epoch': 3} +{'type': 'loss', 'content': 0.011032062582671642, 'timestamp': '2025-09-10 02:53:31.986131', 'step': 6440, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:32.046013', 'step': 6440, 'epoch': 3} +{'type': 'loss', 'content': 5.85177076573018e-05, 'timestamp': '2025-09-10 02:53:32.057572', 'step': 6441, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:32.110386', 'step': 6441, 'epoch': 3} +{'type': 'loss', 'content': 0.043669749051332474, 'timestamp': '2025-09-10 02:53:32.113633', 'step': 6442, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:32.166240', 'step': 6442, 'epoch': 3} +{'type': 'loss', 'content': 0.031538255512714386, 'timestamp': '2025-09-10 02:53:32.168513', 'step': 6443, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:32.221582', 'step': 6443, 'epoch': 3} +{'type': 'loss', 'content': 0.00021350120368879288, 'timestamp': '2025-09-10 02:53:32.227113', 'step': 6444, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:32.279363', 'step': 6444, 'epoch': 3} +{'type': 'loss', 'content': 0.0003119015309493989, 'timestamp': '2025-09-10 02:53:32.281470', 'step': 6445, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:32.334068', 'step': 6445, 'epoch': 3} +{'type': 'loss', 'content': 0.00018298991199117154, 'timestamp': '2025-09-10 02:53:32.336274', 'step': 6446, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:53:32.404626', 'step': 6446, 'epoch': 3} +{'type': 'loss', 'content': 0.0034401267766952515, 'timestamp': '2025-09-10 02:53:32.417251', 'step': 6447, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:32.470032', 'step': 6447, 'epoch': 3} +{'type': 'loss', 'content': 0.0014745743246749043, 'timestamp': '2025-09-10 02:53:32.475506', 'step': 6448, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:32.527662', 'step': 6448, 'epoch': 3} +{'type': 'loss', 'content': 0.02622535452246666, 'timestamp': '2025-09-10 02:53:32.530742', 'step': 6449, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:32.583908', 'step': 6449, 'epoch': 3} +{'type': 'loss', 'content': 0.0010449716355651617, 'timestamp': '2025-09-10 02:53:32.586040', 'step': 6450, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:32.639067', 'step': 6450, 'epoch': 3} +{'type': 'loss', 'content': 0.003003651276230812, 'timestamp': '2025-09-10 02:53:32.641094', 'step': 6451, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:32.694057', 'step': 6451, 'epoch': 3} +{'type': 'loss', 'content': 0.0013862905325368047, 'timestamp': '2025-09-10 02:53:32.699713', 'step': 6452, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:32.756357', 'step': 6452, 'epoch': 3} +{'type': 'loss', 'content': 0.0014461170649155974, 'timestamp': '2025-09-10 02:53:32.767540', 'step': 6453, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:32.820991', 'step': 6453, 'epoch': 3} +{'type': 'loss', 'content': 0.005308613646775484, 'timestamp': '2025-09-10 02:53:32.823690', 'step': 6454, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:32.876856', 'step': 6454, 'epoch': 3} +{'type': 'loss', 'content': 0.0005282104248180985, 'timestamp': '2025-09-10 02:53:32.879608', 'step': 6455, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:32.933009', 'step': 6455, 'epoch': 3} +{'type': 'loss', 'content': 0.0011777032632380724, 'timestamp': '2025-09-10 02:53:32.938615', 'step': 6456, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 608], 'flops': 12160073886080.0}, 'timestamp': '2025-09-10 02:53:33.027976', 'step': 6456, 'epoch': 3} +{'type': 'loss', 'content': 0.011136236600577831, 'timestamp': '2025-09-10 02:53:33.046709', 'step': 6457, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:33.100441', 'step': 6457, 'epoch': 3} +{'type': 'loss', 'content': 0.016885899007320404, 'timestamp': '2025-09-10 02:53:33.102565', 'step': 6458, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:33.155816', 'step': 6458, 'epoch': 3} +{'type': 'loss', 'content': 0.00022576468472834677, 'timestamp': '2025-09-10 02:53:33.165371', 'step': 6459, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:33.218626', 'step': 6459, 'epoch': 3} +{'type': 'loss', 'content': 0.0009160241461358964, 'timestamp': '2025-09-10 02:53:33.224381', 'step': 6460, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:33.277079', 'step': 6460, 'epoch': 3} +{'type': 'loss', 'content': 0.0003347193996887654, 'timestamp': '2025-09-10 02:53:33.279281', 'step': 6461, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:33.332252', 'step': 6461, 'epoch': 3} +{'type': 'loss', 'content': 0.00019199053349439055, 'timestamp': '2025-09-10 02:53:33.334564', 'step': 6462, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:33.387522', 'step': 6462, 'epoch': 3} +{'type': 'loss', 'content': 0.007015415467321873, 'timestamp': '2025-09-10 02:53:33.389897', 'step': 6463, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:33.443171', 'step': 6463, 'epoch': 3} +{'type': 'loss', 'content': 0.00020852791203651577, 'timestamp': '2025-09-10 02:53:33.449147', 'step': 6464, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:53:33.517129', 'step': 6464, 'epoch': 3} +{'type': 'loss', 'content': 0.001491163275204599, 'timestamp': '2025-09-10 02:53:33.531061', 'step': 6465, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:33.591819', 'step': 6465, 'epoch': 3} +{'type': 'loss', 'content': 0.0005892434855923057, 'timestamp': '2025-09-10 02:53:33.602549', 'step': 6466, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:33.655691', 'step': 6466, 'epoch': 3} +{'type': 'loss', 'content': 0.0015197346219792962, 'timestamp': '2025-09-10 02:53:33.661979', 'step': 6467, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:33.715113', 'step': 6467, 'epoch': 3} +{'type': 'loss', 'content': 0.009250625967979431, 'timestamp': '2025-09-10 02:53:33.722366', 'step': 6468, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:53:50.834662', 'step': 6468, 'epoch': 3} +{'type': 'pplx', 'content': 23706738.432858456, 'timestamp': '2025-09-10 02:53:50.837225', 'step': 6468, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:50.892499', 'step': 6468, 'epoch': 3} +{'type': 'loss', 'content': 0.000517043168656528, 'timestamp': '2025-09-10 02:53:50.894641', 'step': 6469, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:50.948793', 'step': 6469, 'epoch': 3} +{'type': 'loss', 'content': 0.0028855192940682173, 'timestamp': '2025-09-10 02:53:50.950978', 'step': 6470, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:51.004319', 'step': 6470, 'epoch': 3} +{'type': 'loss', 'content': 0.001393310958519578, 'timestamp': '2025-09-10 02:53:51.006881', 'step': 6471, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:51.062842', 'step': 6471, 'epoch': 3} +{'type': 'loss', 'content': 0.0002278887404827401, 'timestamp': '2025-09-10 02:53:51.069151', 'step': 6472, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:51.121882', 'step': 6472, 'epoch': 3} +{'type': 'loss', 'content': 0.002905389526858926, 'timestamp': '2025-09-10 02:53:51.124080', 'step': 6473, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:51.177778', 'step': 6473, 'epoch': 3} +{'type': 'loss', 'content': 0.00012196651368867606, 'timestamp': '2025-09-10 02:53:51.185662', 'step': 6474, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:51.239664', 'step': 6474, 'epoch': 3} +{'type': 'loss', 'content': 0.0014588789781555533, 'timestamp': '2025-09-10 02:53:51.241866', 'step': 6475, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:51.297712', 'step': 6475, 'epoch': 3} +{'type': 'loss', 'content': 0.0007198529201559722, 'timestamp': '2025-09-10 02:53:51.308131', 'step': 6476, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:51.361682', 'step': 6476, 'epoch': 3} +{'type': 'loss', 'content': 0.0008955709636211395, 'timestamp': '2025-09-10 02:53:51.363975', 'step': 6477, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:51.417302', 'step': 6477, 'epoch': 3} +{'type': 'loss', 'content': 0.000317004305543378, 'timestamp': '2025-09-10 02:53:51.423754', 'step': 6478, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:51.479617', 'step': 6478, 'epoch': 3} +{'type': 'loss', 'content': 0.0010838760063052177, 'timestamp': '2025-09-10 02:53:51.481716', 'step': 6479, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:51.535085', 'step': 6479, 'epoch': 3} +{'type': 'loss', 'content': 0.0011980189010500908, 'timestamp': '2025-09-10 02:53:51.541155', 'step': 6480, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:51.594604', 'step': 6480, 'epoch': 3} +{'type': 'loss', 'content': 0.007219035644084215, 'timestamp': '2025-09-10 02:53:51.600857', 'step': 6481, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:51.653795', 'step': 6481, 'epoch': 3} +{'type': 'loss', 'content': 0.0030097621493041515, 'timestamp': '2025-09-10 02:53:51.656144', 'step': 6482, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:51.709990', 'step': 6482, 'epoch': 3} +{'type': 'loss', 'content': 0.0002602596068754792, 'timestamp': '2025-09-10 02:53:51.712035', 'step': 6483, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:51.765790', 'step': 6483, 'epoch': 3} +{'type': 'loss', 'content': 0.0004644592700060457, 'timestamp': '2025-09-10 02:53:51.771771', 'step': 6484, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:51.825950', 'step': 6484, 'epoch': 3} +{'type': 'loss', 'content': 0.0019527435069903731, 'timestamp': '2025-09-10 02:53:51.828177', 'step': 6485, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:51.881260', 'step': 6485, 'epoch': 3} +{'type': 'loss', 'content': 0.00231377431191504, 'timestamp': '2025-09-10 02:53:51.883713', 'step': 6486, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:51.937940', 'step': 6486, 'epoch': 3} +{'type': 'loss', 'content': 0.0004013167636003345, 'timestamp': '2025-09-10 02:53:51.947519', 'step': 6487, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:52.001129', 'step': 6487, 'epoch': 3} +{'type': 'loss', 'content': 0.00018575329158920795, 'timestamp': '2025-09-10 02:53:52.006994', 'step': 6488, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:52.059715', 'step': 6488, 'epoch': 3} +{'type': 'loss', 'content': 0.000344709464116022, 'timestamp': '2025-09-10 02:53:52.062815', 'step': 6489, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:52.115937', 'step': 6489, 'epoch': 3} +{'type': 'loss', 'content': 0.0023524740245193243, 'timestamp': '2025-09-10 02:53:52.117982', 'step': 6490, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:52.176429', 'step': 6490, 'epoch': 3} +{'type': 'loss', 'content': 0.010268871672451496, 'timestamp': '2025-09-10 02:53:52.186841', 'step': 6491, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:52.242105', 'step': 6491, 'epoch': 3} +{'type': 'loss', 'content': 0.0018025030149146914, 'timestamp': '2025-09-10 02:53:52.249911', 'step': 6492, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:52.304178', 'step': 6492, 'epoch': 3} +{'type': 'loss', 'content': 0.0006212440202943981, 'timestamp': '2025-09-10 02:53:52.306466', 'step': 6493, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:52.360400', 'step': 6493, 'epoch': 3} +{'type': 'loss', 'content': 0.001148225157521665, 'timestamp': '2025-09-10 02:53:52.368321', 'step': 6494, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:52.430393', 'step': 6494, 'epoch': 3} +{'type': 'loss', 'content': 0.0004987604916095734, 'timestamp': '2025-09-10 02:53:52.441537', 'step': 6495, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:52.496006', 'step': 6495, 'epoch': 3} +{'type': 'loss', 'content': 0.0006656069308519363, 'timestamp': '2025-09-10 02:53:52.504794', 'step': 6496, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:52.557690', 'step': 6496, 'epoch': 3} +{'type': 'loss', 'content': 0.0032480843365192413, 'timestamp': '2025-09-10 02:53:52.559975', 'step': 6497, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:52.626084', 'step': 6497, 'epoch': 3} +{'type': 'loss', 'content': 0.0003583710640668869, 'timestamp': '2025-09-10 02:53:52.638311', 'step': 6498, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:52.692064', 'step': 6498, 'epoch': 3} +{'type': 'loss', 'content': 0.0006013225065544248, 'timestamp': '2025-09-10 02:53:52.700266', 'step': 6499, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:52.753263', 'step': 6499, 'epoch': 3} +{'type': 'loss', 'content': 0.00952020101249218, 'timestamp': '2025-09-10 02:53:52.759594', 'step': 6500, 'epoch': 3} +{'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-10 02:53:53.267713', 'step': 6500, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:53.325965', 'step': 6500, 'epoch': 3} +{'type': 'loss', 'content': 0.0012291369494050741, 'timestamp': '2025-09-10 02:53:53.328467', 'step': 6501, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:53.383556', 'step': 6501, 'epoch': 3} +{'type': 'loss', 'content': 0.0013674128567799926, 'timestamp': '2025-09-10 02:53:53.385999', 'step': 6502, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:53.439551', 'step': 6502, 'epoch': 3} +{'type': 'loss', 'content': 0.0005135418614372611, 'timestamp': '2025-09-10 02:53:53.441775', 'step': 6503, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:53.494717', 'step': 6503, 'epoch': 3} +{'type': 'loss', 'content': 0.00014980064588598907, 'timestamp': '2025-09-10 02:53:53.501293', 'step': 6504, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:53.566044', 'step': 6504, 'epoch': 3} +{'type': 'loss', 'content': 0.0026989339385181665, 'timestamp': '2025-09-10 02:53:53.579276', 'step': 6505, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:53.634602', 'step': 6505, 'epoch': 3} +{'type': 'loss', 'content': 0.0003520399332046509, 'timestamp': '2025-09-10 02:53:53.640414', 'step': 6506, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:53.702245', 'step': 6506, 'epoch': 3} +{'type': 'loss', 'content': 0.0008410373702645302, 'timestamp': '2025-09-10 02:53:53.713362', 'step': 6507, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:53.767549', 'step': 6507, 'epoch': 3} +{'type': 'loss', 'content': 0.0025949280243366957, 'timestamp': '2025-09-10 02:53:53.773753', 'step': 6508, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:53.826078', 'step': 6508, 'epoch': 3} +{'type': 'loss', 'content': 0.0006233364692889154, 'timestamp': '2025-09-10 02:53:53.828218', 'step': 6509, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:53.881248', 'step': 6509, 'epoch': 3} +{'type': 'loss', 'content': 0.003180032828822732, 'timestamp': '2025-09-10 02:53:53.883787', 'step': 6510, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:53.937217', 'step': 6510, 'epoch': 3} +{'type': 'loss', 'content': 0.00014026327698957175, 'timestamp': '2025-09-10 02:53:53.939426', 'step': 6511, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:53.993281', 'step': 6511, 'epoch': 3} +{'type': 'loss', 'content': 0.0010266329627484083, 'timestamp': '2025-09-10 02:53:53.999728', 'step': 6512, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:54.053923', 'step': 6512, 'epoch': 3} +{'type': 'loss', 'content': 0.0014553897781297565, 'timestamp': '2025-09-10 02:53:54.056289', 'step': 6513, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:54.111797', 'step': 6513, 'epoch': 3} +{'type': 'loss', 'content': 0.0036328397691249847, 'timestamp': '2025-09-10 02:53:54.121223', 'step': 6514, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:54.180006', 'step': 6514, 'epoch': 3} +{'type': 'loss', 'content': 0.004422703292220831, 'timestamp': '2025-09-10 02:53:54.190413', 'step': 6515, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:54.249791', 'step': 6515, 'epoch': 3} +{'type': 'loss', 'content': 0.0021767716389149427, 'timestamp': '2025-09-10 02:53:54.261008', 'step': 6516, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:54.313849', 'step': 6516, 'epoch': 3} +{'type': 'loss', 'content': 0.00010643379937391728, 'timestamp': '2025-09-10 02:53:54.316118', 'step': 6517, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:54.369395', 'step': 6517, 'epoch': 3} +{'type': 'loss', 'content': 0.0007238482357934117, 'timestamp': '2025-09-10 02:53:54.371763', 'step': 6518, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:54.425814', 'step': 6518, 'epoch': 3} +{'type': 'loss', 'content': 0.008917992934584618, 'timestamp': '2025-09-10 02:53:54.427952', 'step': 6519, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:54.481119', 'step': 6519, 'epoch': 3} +{'type': 'loss', 'content': 7.946386904222891e-05, 'timestamp': '2025-09-10 02:53:54.487176', 'step': 6520, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:54.543900', 'step': 6520, 'epoch': 3} +{'type': 'loss', 'content': 0.006381779909133911, 'timestamp': '2025-09-10 02:53:54.555105', 'step': 6521, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:54.608324', 'step': 6521, 'epoch': 3} +{'type': 'loss', 'content': 0.00010170700988965109, 'timestamp': '2025-09-10 02:53:54.610838', 'step': 6522, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:54.664957', 'step': 6522, 'epoch': 3} +{'type': 'loss', 'content': 0.005070645362138748, 'timestamp': '2025-09-10 02:53:54.673078', 'step': 6523, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:54.726884', 'step': 6523, 'epoch': 3} +{'type': 'loss', 'content': 0.0010211608605459332, 'timestamp': '2025-09-10 02:53:54.734117', 'step': 6524, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:53:54.787306', 'step': 6524, 'epoch': 3} +{'type': 'loss', 'content': 0.0017876082565635443, 'timestamp': '2025-09-10 02:53:54.789586', 'step': 6525, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:54.845344', 'step': 6525, 'epoch': 3} +{'type': 'loss', 'content': 0.0002271537232445553, 'timestamp': '2025-09-10 02:53:54.854967', 'step': 6526, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:54.909215', 'step': 6526, 'epoch': 3} +{'type': 'loss', 'content': 0.0008145435713231564, 'timestamp': '2025-09-10 02:53:54.912946', 'step': 6527, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:54.966898', 'step': 6527, 'epoch': 3} +{'type': 'loss', 'content': 0.00027083768509328365, 'timestamp': '2025-09-10 02:53:54.973772', 'step': 6528, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:55.026720', 'step': 6528, 'epoch': 3} +{'type': 'loss', 'content': 0.0006037470884621143, 'timestamp': '2025-09-10 02:53:55.036662', 'step': 6529, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:55.091448', 'step': 6529, 'epoch': 3} +{'type': 'loss', 'content': 0.00046910453238524497, 'timestamp': '2025-09-10 02:53:55.093972', 'step': 6530, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:55.155903', 'step': 6530, 'epoch': 3} +{'type': 'loss', 'content': 0.00018874193483497947, 'timestamp': '2025-09-10 02:53:55.166650', 'step': 6531, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:55.220124', 'step': 6531, 'epoch': 3} +{'type': 'loss', 'content': 0.00042390087037347257, 'timestamp': '2025-09-10 02:53:55.227516', 'step': 6532, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:55.280902', 'step': 6532, 'epoch': 3} +{'type': 'loss', 'content': 0.009650534018874168, 'timestamp': '2025-09-10 02:53:55.283240', 'step': 6533, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:55.336166', 'step': 6533, 'epoch': 3} +{'type': 'loss', 'content': 0.0010299842106178403, 'timestamp': '2025-09-10 02:53:55.344521', 'step': 6534, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:55.397568', 'step': 6534, 'epoch': 3} +{'type': 'loss', 'content': 0.0001963161921594292, 'timestamp': '2025-09-10 02:53:55.399901', 'step': 6535, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:55.465274', 'step': 6535, 'epoch': 3} +{'type': 'loss', 'content': 0.0001372817496303469, 'timestamp': '2025-09-10 02:53:55.477188', 'step': 6536, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:55.529969', 'step': 6536, 'epoch': 3} +{'type': 'loss', 'content': 0.0009262050152756274, 'timestamp': '2025-09-10 02:53:55.532199', 'step': 6537, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:55.586892', 'step': 6537, 'epoch': 3} +{'type': 'loss', 'content': 0.0005956540699116886, 'timestamp': '2025-09-10 02:53:55.596696', 'step': 6538, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:55.650477', 'step': 6538, 'epoch': 3} +{'type': 'loss', 'content': 0.0005263627972453833, 'timestamp': '2025-09-10 02:53:55.652928', 'step': 6539, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:55.706309', 'step': 6539, 'epoch': 3} +{'type': 'loss', 'content': 0.020635077729821205, 'timestamp': '2025-09-10 02:53:55.712474', 'step': 6540, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:55.765653', 'step': 6540, 'epoch': 3} +{'type': 'loss', 'content': 0.003491123905405402, 'timestamp': '2025-09-10 02:53:55.768325', 'step': 6541, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:53:55.821497', 'step': 6541, 'epoch': 3} +{'type': 'loss', 'content': 9.034317190526053e-05, 'timestamp': '2025-09-10 02:53:55.823854', 'step': 6542, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:55.878186', 'step': 6542, 'epoch': 3} +{'type': 'loss', 'content': 0.00027987625799141824, 'timestamp': '2025-09-10 02:53:55.880570', 'step': 6543, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:55.935817', 'step': 6543, 'epoch': 3} +{'type': 'loss', 'content': 0.00014497018128167838, 'timestamp': '2025-09-10 02:53:55.942538', 'step': 6544, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:55.995103', 'step': 6544, 'epoch': 3} +{'type': 'loss', 'content': 0.00026463859830982983, 'timestamp': '2025-09-10 02:53:55.997910', 'step': 6545, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:56.050998', 'step': 6545, 'epoch': 3} +{'type': 'loss', 'content': 0.003814331954345107, 'timestamp': '2025-09-10 02:53:56.053386', 'step': 6546, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:56.106799', 'step': 6546, 'epoch': 3} +{'type': 'loss', 'content': 0.00011513993376865983, 'timestamp': '2025-09-10 02:53:56.109063', 'step': 6547, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:56.161700', 'step': 6547, 'epoch': 3} +{'type': 'loss', 'content': 0.00196033320389688, 'timestamp': '2025-09-10 02:53:56.168003', 'step': 6548, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:56.220985', 'step': 6548, 'epoch': 3} +{'type': 'loss', 'content': 0.029767900705337524, 'timestamp': '2025-09-10 02:53:56.223596', 'step': 6549, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:56.277451', 'step': 6549, 'epoch': 3} +{'type': 'loss', 'content': 0.009712413884699345, 'timestamp': '2025-09-10 02:53:56.283325', 'step': 6550, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:56.337370', 'step': 6550, 'epoch': 3} +{'type': 'loss', 'content': 0.0003727537696249783, 'timestamp': '2025-09-10 02:53:56.339519', 'step': 6551, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:56.393199', 'step': 6551, 'epoch': 3} +{'type': 'loss', 'content': 0.00031455809948965907, 'timestamp': '2025-09-10 02:53:56.403196', 'step': 6552, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:56.457062', 'step': 6552, 'epoch': 3} +{'type': 'loss', 'content': 0.00028856462449766695, 'timestamp': '2025-09-10 02:53:56.459268', 'step': 6553, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:53:56.513072', 'step': 6553, 'epoch': 3} +{'type': 'loss', 'content': 0.00010789121733978391, 'timestamp': '2025-09-10 02:53:56.522698', 'step': 6554, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:56.577708', 'step': 6554, 'epoch': 3} +{'type': 'loss', 'content': 0.00018292127060703933, 'timestamp': '2025-09-10 02:53:56.580156', 'step': 6555, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:56.634248', 'step': 6555, 'epoch': 3} +{'type': 'loss', 'content': 0.00044557778164744377, 'timestamp': '2025-09-10 02:53:56.640574', 'step': 6556, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:56.694428', 'step': 6556, 'epoch': 3} +{'type': 'loss', 'content': 0.0014421871164813638, 'timestamp': '2025-09-10 02:53:56.697109', 'step': 6557, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:56.750746', 'step': 6557, 'epoch': 3} +{'type': 'loss', 'content': 0.0059631625190377235, 'timestamp': '2025-09-10 02:53:56.753532', 'step': 6558, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:56.807009', 'step': 6558, 'epoch': 3} +{'type': 'loss', 'content': 0.0004969359142705798, 'timestamp': '2025-09-10 02:53:56.809563', 'step': 6559, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:56.863096', 'step': 6559, 'epoch': 3} +{'type': 'loss', 'content': 0.0009079108131118119, 'timestamp': '2025-09-10 02:53:56.869690', 'step': 6560, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:56.922992', 'step': 6560, 'epoch': 3} +{'type': 'loss', 'content': 0.00012841433635912836, 'timestamp': '2025-09-10 02:53:56.930983', 'step': 6561, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:56.992214', 'step': 6561, 'epoch': 3} +{'type': 'loss', 'content': 0.000660736404825002, 'timestamp': '2025-09-10 02:53:57.002941', 'step': 6562, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:57.057131', 'step': 6562, 'epoch': 3} +{'type': 'loss', 'content': 0.0005666794604621828, 'timestamp': '2025-09-10 02:53:57.059532', 'step': 6563, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:57.113473', 'step': 6563, 'epoch': 3} +{'type': 'loss', 'content': 0.00102478195913136, 'timestamp': '2025-09-10 02:53:57.119778', 'step': 6564, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:57.172782', 'step': 6564, 'epoch': 3} +{'type': 'loss', 'content': 0.013485388830304146, 'timestamp': '2025-09-10 02:53:57.175172', 'step': 6565, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:53:57.244015', 'step': 6565, 'epoch': 3} +{'type': 'loss', 'content': 0.0001536666095489636, 'timestamp': '2025-09-10 02:53:57.256582', 'step': 6566, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 848], 'flops': 16960103024960.0}, 'timestamp': '2025-09-10 02:53:57.379965', 'step': 6566, 'epoch': 3} +{'type': 'loss', 'content': 0.0012508033541962504, 'timestamp': '2025-09-10 02:53:57.403939', 'step': 6567, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:57.475396', 'step': 6567, 'epoch': 3} +{'type': 'loss', 'content': 0.0001451898569939658, 'timestamp': '2025-09-10 02:53:57.485998', 'step': 6568, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:57.552281', 'step': 6568, 'epoch': 3} +{'type': 'loss', 'content': 0.000552977027837187, 'timestamp': '2025-09-10 02:53:57.555219', 'step': 6569, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:57.620793', 'step': 6569, 'epoch': 3} +{'type': 'loss', 'content': 0.00020744935318361968, 'timestamp': '2025-09-10 02:53:57.625747', 'step': 6570, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:57.686107', 'step': 6570, 'epoch': 3} +{'type': 'loss', 'content': 0.005688023287802935, 'timestamp': '2025-09-10 02:53:57.692337', 'step': 6571, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:57.758584', 'step': 6571, 'epoch': 3} +{'type': 'loss', 'content': 0.0023701158352196217, 'timestamp': '2025-09-10 02:53:57.766644', 'step': 6572, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:57.843154', 'step': 6572, 'epoch': 3} +{'type': 'loss', 'content': 0.010350550524890423, 'timestamp': '2025-09-10 02:53:57.854683', 'step': 6573, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:53:57.933713', 'step': 6573, 'epoch': 3} +{'type': 'loss', 'content': 0.0031393878161907196, 'timestamp': '2025-09-10 02:53:57.944747', 'step': 6574, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:53:58.009012', 'step': 6574, 'epoch': 3} +{'type': 'loss', 'content': 0.0009653661982156336, 'timestamp': '2025-09-10 02:53:58.014377', 'step': 6575, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:53:58.102683', 'step': 6575, 'epoch': 3} +{'type': 'loss', 'content': 0.010989164933562279, 'timestamp': '2025-09-10 02:53:58.117488', 'step': 6576, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:53:58.203536', 'step': 6576, 'epoch': 3} +{'type': 'loss', 'content': 6.298122752923518e-05, 'timestamp': '2025-09-10 02:53:58.217276', 'step': 6577, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:58.279177', 'step': 6577, 'epoch': 3} +{'type': 'loss', 'content': 0.00017397617921233177, 'timestamp': '2025-09-10 02:53:58.281721', 'step': 6578, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:58.344290', 'step': 6578, 'epoch': 3} +{'type': 'loss', 'content': 0.001520134275779128, 'timestamp': '2025-09-10 02:53:58.356292', 'step': 6579, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:58.410398', 'step': 6579, 'epoch': 3} +{'type': 'loss', 'content': 0.0001435176673112437, 'timestamp': '2025-09-10 02:53:58.416628', 'step': 6580, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:58.469883', 'step': 6580, 'epoch': 3} +{'type': 'loss', 'content': 0.0003124767099507153, 'timestamp': '2025-09-10 02:53:58.477442', 'step': 6581, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:53:58.530975', 'step': 6581, 'epoch': 3} +{'type': 'loss', 'content': 0.003484593238681555, 'timestamp': '2025-09-10 02:53:58.538844', 'step': 6582, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:58.593615', 'step': 6582, 'epoch': 3} +{'type': 'loss', 'content': 0.0002820561931002885, 'timestamp': '2025-09-10 02:53:58.595866', 'step': 6583, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:53:58.653779', 'step': 6583, 'epoch': 3} +{'type': 'loss', 'content': 0.0002700116310734302, 'timestamp': '2025-09-10 02:53:58.665025', 'step': 6584, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:58.717934', 'step': 6584, 'epoch': 3} +{'type': 'loss', 'content': 4.787747820955701e-05, 'timestamp': '2025-09-10 02:53:58.720079', 'step': 6585, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:58.773538', 'step': 6585, 'epoch': 3} +{'type': 'loss', 'content': 0.0011721830815076828, 'timestamp': '2025-09-10 02:53:58.780270', 'step': 6586, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:58.833534', 'step': 6586, 'epoch': 3} +{'type': 'loss', 'content': 0.00013192297774367034, 'timestamp': '2025-09-10 02:53:58.835936', 'step': 6587, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:53:58.888556', 'step': 6587, 'epoch': 3} +{'type': 'loss', 'content': 0.00015103282930795103, 'timestamp': '2025-09-10 02:53:58.894623', 'step': 6588, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:58.959647', 'step': 6588, 'epoch': 3} +{'type': 'loss', 'content': 0.0002405633422313258, 'timestamp': '2025-09-10 02:53:58.972898', 'step': 6589, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:53:59.026019', 'step': 6589, 'epoch': 3} +{'type': 'loss', 'content': 4.490548599278554e-05, 'timestamp': '2025-09-10 02:53:59.028228', 'step': 6590, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:59.082896', 'step': 6590, 'epoch': 3} +{'type': 'loss', 'content': 0.004959744401276112, 'timestamp': '2025-09-10 02:53:59.092632', 'step': 6591, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:53:59.146471', 'step': 6591, 'epoch': 3} +{'type': 'loss', 'content': 0.007809832692146301, 'timestamp': '2025-09-10 02:53:59.153661', 'step': 6592, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:59.206902', 'step': 6592, 'epoch': 3} +{'type': 'loss', 'content': 0.008047600276768208, 'timestamp': '2025-09-10 02:53:59.210697', 'step': 6593, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:53:59.279871', 'step': 6593, 'epoch': 3} +{'type': 'loss', 'content': 0.00016849783423822373, 'timestamp': '2025-09-10 02:53:59.292092', 'step': 6594, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:59.346093', 'step': 6594, 'epoch': 3} +{'type': 'loss', 'content': 0.0005317169707268476, 'timestamp': '2025-09-10 02:53:59.349891', 'step': 6595, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:59.404111', 'step': 6595, 'epoch': 3} +{'type': 'loss', 'content': 0.0018430011114105582, 'timestamp': '2025-09-10 02:53:59.414425', 'step': 6596, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:53:59.479311', 'step': 6596, 'epoch': 3} +{'type': 'loss', 'content': 0.0001944272080436349, 'timestamp': '2025-09-10 02:53:59.490901', 'step': 6597, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:59.556393', 'step': 6597, 'epoch': 3} +{'type': 'loss', 'content': 0.00018813603674061596, 'timestamp': '2025-09-10 02:53:59.558670', 'step': 6598, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:59.621189', 'step': 6598, 'epoch': 3} +{'type': 'loss', 'content': 0.002565057249739766, 'timestamp': '2025-09-10 02:53:59.629532', 'step': 6599, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:53:59.692916', 'step': 6599, 'epoch': 3} +{'type': 'loss', 'content': 0.0016462865751236677, 'timestamp': '2025-09-10 02:53:59.703519', 'step': 6600, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:53:59.762275', 'step': 6600, 'epoch': 3} +{'type': 'loss', 'content': 0.01904921419918537, 'timestamp': '2025-09-10 02:53:59.764723', 'step': 6601, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:53:59.824371', 'step': 6601, 'epoch': 3} +{'type': 'loss', 'content': 0.003179634688422084, 'timestamp': '2025-09-10 02:53:59.827315', 'step': 6602, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:53:59.885992', 'step': 6602, 'epoch': 3} +{'type': 'loss', 'content': 0.0002889272873289883, 'timestamp': '2025-09-10 02:53:59.889921', 'step': 6603, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:53:59.951085', 'step': 6603, 'epoch': 3} +{'type': 'loss', 'content': 0.0005600472795777023, 'timestamp': '2025-09-10 02:53:59.957700', 'step': 6604, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:00.012179', 'step': 6604, 'epoch': 3} +{'type': 'loss', 'content': 0.0005887220031581819, 'timestamp': '2025-09-10 02:54:00.015387', 'step': 6605, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:00.072385', 'step': 6605, 'epoch': 3} +{'type': 'loss', 'content': 0.00037788759800605476, 'timestamp': '2025-09-10 02:54:00.081439', 'step': 6606, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:00.138928', 'step': 6606, 'epoch': 3} +{'type': 'loss', 'content': 0.0008682940970174968, 'timestamp': '2025-09-10 02:54:00.141920', 'step': 6607, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:54:00.240132', 'step': 6607, 'epoch': 3} +{'type': 'loss', 'content': 0.0028730365447700024, 'timestamp': '2025-09-10 02:54:00.259434', 'step': 6608, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:00.318832', 'step': 6608, 'epoch': 3} +{'type': 'loss', 'content': 0.0013414019485935569, 'timestamp': '2025-09-10 02:54:00.326915', 'step': 6609, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:00.383989', 'step': 6609, 'epoch': 3} +{'type': 'loss', 'content': 5.672200131812133e-05, 'timestamp': '2025-09-10 02:54:00.386121', 'step': 6610, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:54:00.454608', 'step': 6610, 'epoch': 3} +{'type': 'loss', 'content': 0.0005330638960003853, 'timestamp': '2025-09-10 02:54:00.467185', 'step': 6611, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:00.520130', 'step': 6611, 'epoch': 3} +{'type': 'loss', 'content': 5.573120870394632e-05, 'timestamp': '2025-09-10 02:54:00.525847', 'step': 6612, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:00.578308', 'step': 6612, 'epoch': 3} +{'type': 'loss', 'content': 0.0016775779658928514, 'timestamp': '2025-09-10 02:54:00.581389', 'step': 6613, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:00.635674', 'step': 6613, 'epoch': 3} +{'type': 'loss', 'content': 3.7891633837716654e-05, 'timestamp': '2025-09-10 02:54:00.645346', 'step': 6614, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:00.707046', 'step': 6614, 'epoch': 3} +{'type': 'loss', 'content': 0.0009671809384599328, 'timestamp': '2025-09-10 02:54:00.717966', 'step': 6615, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:54:17.514645', 'step': 6615, 'epoch': 3} +{'type': 'pplx', 'content': 23385001.859677915, 'timestamp': '2025-09-10 02:54:17.517245', 'step': 6615, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:17.571958', 'step': 6615, 'epoch': 3} +{'type': 'loss', 'content': 0.0028186680283397436, 'timestamp': '2025-09-10 02:54:17.579820', 'step': 6616, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:17.634004', 'step': 6616, 'epoch': 3} +{'type': 'loss', 'content': 0.0005324442172423005, 'timestamp': '2025-09-10 02:54:17.639042', 'step': 6617, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:17.700630', 'step': 6617, 'epoch': 3} +{'type': 'loss', 'content': 1.9863709894707426e-05, 'timestamp': '2025-09-10 02:54:17.711371', 'step': 6618, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:17.767144', 'step': 6618, 'epoch': 3} +{'type': 'loss', 'content': 0.0036892550997436047, 'timestamp': '2025-09-10 02:54:17.769525', 'step': 6619, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:17.826222', 'step': 6619, 'epoch': 3} +{'type': 'loss', 'content': 0.0038833674043416977, 'timestamp': '2025-09-10 02:54:17.832318', 'step': 6620, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:17.888929', 'step': 6620, 'epoch': 3} +{'type': 'loss', 'content': 7.060106872813776e-05, 'timestamp': '2025-09-10 02:54:17.899310', 'step': 6621, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:17.960237', 'step': 6621, 'epoch': 3} +{'type': 'loss', 'content': 0.03629033640027046, 'timestamp': '2025-09-10 02:54:17.966411', 'step': 6622, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:18.020923', 'step': 6622, 'epoch': 3} +{'type': 'loss', 'content': 2.686465268197935e-05, 'timestamp': '2025-09-10 02:54:18.023565', 'step': 6623, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:18.079257', 'step': 6623, 'epoch': 3} +{'type': 'loss', 'content': 3.211958028259687e-05, 'timestamp': '2025-09-10 02:54:18.085985', 'step': 6624, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:18.139293', 'step': 6624, 'epoch': 3} +{'type': 'loss', 'content': 1.6827079889480956e-05, 'timestamp': '2025-09-10 02:54:18.144760', 'step': 6625, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:18.203213', 'step': 6625, 'epoch': 3} +{'type': 'loss', 'content': 0.00018668481789063662, 'timestamp': '2025-09-10 02:54:18.213665', 'step': 6626, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:18.267559', 'step': 6626, 'epoch': 3} +{'type': 'loss', 'content': 1.3264459084894042e-05, 'timestamp': '2025-09-10 02:54:18.270255', 'step': 6627, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:18.323337', 'step': 6627, 'epoch': 3} +{'type': 'loss', 'content': 0.002850922988727689, 'timestamp': '2025-09-10 02:54:18.329543', 'step': 6628, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:18.382284', 'step': 6628, 'epoch': 3} +{'type': 'loss', 'content': 0.00019906852685380727, 'timestamp': '2025-09-10 02:54:18.384789', 'step': 6629, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:18.445292', 'step': 6629, 'epoch': 3} +{'type': 'loss', 'content': 0.0008952109492383897, 'timestamp': '2025-09-10 02:54:18.456048', 'step': 6630, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:18.511774', 'step': 6630, 'epoch': 3} +{'type': 'loss', 'content': 4.7878238547127694e-05, 'timestamp': '2025-09-10 02:54:18.514132', 'step': 6631, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:18.567494', 'step': 6631, 'epoch': 3} +{'type': 'loss', 'content': 3.679122164612636e-05, 'timestamp': '2025-09-10 02:54:18.573778', 'step': 6632, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:18.632055', 'step': 6632, 'epoch': 3} +{'type': 'loss', 'content': 0.00027644942747429013, 'timestamp': '2025-09-10 02:54:18.638007', 'step': 6633, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:18.696287', 'step': 6633, 'epoch': 3} +{'type': 'loss', 'content': 0.0034692331682890654, 'timestamp': '2025-09-10 02:54:18.698882', 'step': 6634, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:18.753258', 'step': 6634, 'epoch': 3} +{'type': 'loss', 'content': 3.929921876988374e-05, 'timestamp': '2025-09-10 02:54:18.755752', 'step': 6635, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:54:18.818136', 'step': 6635, 'epoch': 3} +{'type': 'loss', 'content': 5.747115210397169e-05, 'timestamp': '2025-09-10 02:54:18.830031', 'step': 6636, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:18.883677', 'step': 6636, 'epoch': 3} +{'type': 'loss', 'content': 7.895226735854521e-05, 'timestamp': '2025-09-10 02:54:18.886067', 'step': 6637, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:18.939694', 'step': 6637, 'epoch': 3} +{'type': 'loss', 'content': 0.0011700527975335717, 'timestamp': '2025-09-10 02:54:18.941934', 'step': 6638, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:18.996079', 'step': 6638, 'epoch': 3} +{'type': 'loss', 'content': 9.012048394652084e-05, 'timestamp': '2025-09-10 02:54:19.005679', 'step': 6639, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:19.058694', 'step': 6639, 'epoch': 3} +{'type': 'loss', 'content': 0.0004693043010775, 'timestamp': '2025-09-10 02:54:19.064943', 'step': 6640, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:19.120542', 'step': 6640, 'epoch': 3} +{'type': 'loss', 'content': 4.708974665845744e-05, 'timestamp': '2025-09-10 02:54:19.127521', 'step': 6641, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:19.181188', 'step': 6641, 'epoch': 3} +{'type': 'loss', 'content': 0.00025819733855314553, 'timestamp': '2025-09-10 02:54:19.183849', 'step': 6642, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:19.237686', 'step': 6642, 'epoch': 3} +{'type': 'loss', 'content': 8.41783985379152e-05, 'timestamp': '2025-09-10 02:54:19.247203', 'step': 6643, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:19.302405', 'step': 6643, 'epoch': 3} +{'type': 'loss', 'content': 0.012563510797917843, 'timestamp': '2025-09-10 02:54:19.312945', 'step': 6644, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:19.366179', 'step': 6644, 'epoch': 3} +{'type': 'loss', 'content': 1.19809355965117e-05, 'timestamp': '2025-09-10 02:54:19.372519', 'step': 6645, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:19.427087', 'step': 6645, 'epoch': 3} +{'type': 'loss', 'content': 0.00017791613936424255, 'timestamp': '2025-09-10 02:54:19.434638', 'step': 6646, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:19.488953', 'step': 6646, 'epoch': 3} +{'type': 'loss', 'content': 7.796345016686246e-05, 'timestamp': '2025-09-10 02:54:19.494754', 'step': 6647, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:19.549708', 'step': 6647, 'epoch': 3} +{'type': 'loss', 'content': 0.0002098836557706818, 'timestamp': '2025-09-10 02:54:19.557949', 'step': 6648, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:19.612383', 'step': 6648, 'epoch': 3} +{'type': 'loss', 'content': 0.07032088935375214, 'timestamp': '2025-09-10 02:54:19.614568', 'step': 6649, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:19.667911', 'step': 6649, 'epoch': 3} +{'type': 'loss', 'content': 0.00012634944869205356, 'timestamp': '2025-09-10 02:54:19.670796', 'step': 6650, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:19.727055', 'step': 6650, 'epoch': 3} +{'type': 'loss', 'content': 2.4797018340905197e-05, 'timestamp': '2025-09-10 02:54:19.736662', 'step': 6651, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:19.791053', 'step': 6651, 'epoch': 3} +{'type': 'loss', 'content': 7.501683285227045e-05, 'timestamp': '2025-09-10 02:54:19.801374', 'step': 6652, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:19.854278', 'step': 6652, 'epoch': 3} +{'type': 'loss', 'content': 0.0016405986389145255, 'timestamp': '2025-09-10 02:54:19.859029', 'step': 6653, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:19.914082', 'step': 6653, 'epoch': 3} +{'type': 'loss', 'content': 0.0006561618647538126, 'timestamp': '2025-09-10 02:54:19.916264', 'step': 6654, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:19.971736', 'step': 6654, 'epoch': 3} +{'type': 'loss', 'content': 6.329329607979162e-06, 'timestamp': '2025-09-10 02:54:19.973995', 'step': 6655, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:20.029901', 'step': 6655, 'epoch': 3} +{'type': 'loss', 'content': 0.00026703623007051647, 'timestamp': '2025-09-10 02:54:20.036791', 'step': 6656, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:20.091295', 'step': 6656, 'epoch': 3} +{'type': 'loss', 'content': 7.446396921295673e-06, 'timestamp': '2025-09-10 02:54:20.094056', 'step': 6657, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:20.147950', 'step': 6657, 'epoch': 3} +{'type': 'loss', 'content': 0.00018190269474871457, 'timestamp': '2025-09-10 02:54:20.153574', 'step': 6658, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:20.207517', 'step': 6658, 'epoch': 3} +{'type': 'loss', 'content': 5.149678327143192e-05, 'timestamp': '2025-09-10 02:54:20.209700', 'step': 6659, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:20.263432', 'step': 6659, 'epoch': 3} +{'type': 'loss', 'content': 0.0034816188272088766, 'timestamp': '2025-09-10 02:54:20.270276', 'step': 6660, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:20.323098', 'step': 6660, 'epoch': 3} +{'type': 'loss', 'content': 0.00016923309885896742, 'timestamp': '2025-09-10 02:54:20.333057', 'step': 6661, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:20.387889', 'step': 6661, 'epoch': 3} +{'type': 'loss', 'content': 0.05583750829100609, 'timestamp': '2025-09-10 02:54:20.390118', 'step': 6662, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:20.443551', 'step': 6662, 'epoch': 3} +{'type': 'loss', 'content': 3.884433681378141e-05, 'timestamp': '2025-09-10 02:54:20.445847', 'step': 6663, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:20.499357', 'step': 6663, 'epoch': 3} +{'type': 'loss', 'content': 6.830570782767609e-05, 'timestamp': '2025-09-10 02:54:20.505692', 'step': 6664, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:20.558413', 'step': 6664, 'epoch': 3} +{'type': 'loss', 'content': 9.02785177459009e-05, 'timestamp': '2025-09-10 02:54:20.560829', 'step': 6665, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:20.615081', 'step': 6665, 'epoch': 3} +{'type': 'loss', 'content': 0.0006628134869970381, 'timestamp': '2025-09-10 02:54:20.617328', 'step': 6666, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:20.672578', 'step': 6666, 'epoch': 3} +{'type': 'loss', 'content': 0.0002714922302402556, 'timestamp': '2025-09-10 02:54:20.677676', 'step': 6667, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:20.732577', 'step': 6667, 'epoch': 3} +{'type': 'loss', 'content': 0.00010888448741752654, 'timestamp': '2025-09-10 02:54:20.739085', 'step': 6668, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:20.793092', 'step': 6668, 'epoch': 3} +{'type': 'loss', 'content': 0.0011129614431411028, 'timestamp': '2025-09-10 02:54:20.798856', 'step': 6669, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:20.853585', 'step': 6669, 'epoch': 3} +{'type': 'loss', 'content': 2.8284735890338197e-05, 'timestamp': '2025-09-10 02:54:20.857428', 'step': 6670, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:20.922145', 'step': 6670, 'epoch': 3} +{'type': 'loss', 'content': 0.0009904785547405481, 'timestamp': '2025-09-10 02:54:20.933081', 'step': 6671, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:20.998750', 'step': 6671, 'epoch': 3} +{'type': 'loss', 'content': 0.0010126323904842138, 'timestamp': '2025-09-10 02:54:21.017600', 'step': 6672, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:21.075798', 'step': 6672, 'epoch': 3} +{'type': 'loss', 'content': 0.00011522303248057142, 'timestamp': '2025-09-10 02:54:21.081551', 'step': 6673, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:21.145979', 'step': 6673, 'epoch': 3} +{'type': 'loss', 'content': 0.006222769618034363, 'timestamp': '2025-09-10 02:54:21.155387', 'step': 6674, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:21.215000', 'step': 6674, 'epoch': 3} +{'type': 'loss', 'content': 1.781493665475864e-05, 'timestamp': '2025-09-10 02:54:21.218512', 'step': 6675, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:21.282809', 'step': 6675, 'epoch': 3} +{'type': 'loss', 'content': 9.508735092822462e-05, 'timestamp': '2025-09-10 02:54:21.300500', 'step': 6676, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:21.378282', 'step': 6676, 'epoch': 3} +{'type': 'loss', 'content': 0.00018511201778892428, 'timestamp': '2025-09-10 02:54:21.388586', 'step': 6677, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:21.458911', 'step': 6677, 'epoch': 3} +{'type': 'loss', 'content': 0.0032804924994707108, 'timestamp': '2025-09-10 02:54:21.472931', 'step': 6678, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:21.535889', 'step': 6678, 'epoch': 3} +{'type': 'loss', 'content': 0.004024840425699949, 'timestamp': '2025-09-10 02:54:21.548459', 'step': 6679, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:21.611605', 'step': 6679, 'epoch': 3} +{'type': 'loss', 'content': 1.6929701814660802e-05, 'timestamp': '2025-09-10 02:54:21.624378', 'step': 6680, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:21.689518', 'step': 6680, 'epoch': 3} +{'type': 'loss', 'content': 0.0008354577585123479, 'timestamp': '2025-09-10 02:54:21.701972', 'step': 6681, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:21.759701', 'step': 6681, 'epoch': 3} +{'type': 'loss', 'content': 5.089729529572651e-05, 'timestamp': '2025-09-10 02:54:21.774658', 'step': 6682, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:21.836970', 'step': 6682, 'epoch': 3} +{'type': 'loss', 'content': 0.0003190193383488804, 'timestamp': '2025-09-10 02:54:21.839086', 'step': 6683, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:21.892067', 'step': 6683, 'epoch': 3} +{'type': 'loss', 'content': 0.0002202236355515197, 'timestamp': '2025-09-10 02:54:21.898347', 'step': 6684, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:21.950857', 'step': 6684, 'epoch': 3} +{'type': 'loss', 'content': 0.006472047418355942, 'timestamp': '2025-09-10 02:54:21.953253', 'step': 6685, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:22.007358', 'step': 6685, 'epoch': 3} +{'type': 'loss', 'content': 0.00014502814156003296, 'timestamp': '2025-09-10 02:54:22.016887', 'step': 6686, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:22.071215', 'step': 6686, 'epoch': 3} +{'type': 'loss', 'content': 6.736422801623121e-05, 'timestamp': '2025-09-10 02:54:22.073508', 'step': 6687, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:54:22.135631', 'step': 6687, 'epoch': 3} +{'type': 'loss', 'content': 0.034477561712265015, 'timestamp': '2025-09-10 02:54:22.147485', 'step': 6688, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:22.200777', 'step': 6688, 'epoch': 3} +{'type': 'loss', 'content': 1.9786597476922907e-05, 'timestamp': '2025-09-10 02:54:22.203128', 'step': 6689, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:22.256301', 'step': 6689, 'epoch': 3} +{'type': 'loss', 'content': 4.165489008300938e-05, 'timestamp': '2025-09-10 02:54:22.259129', 'step': 6690, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-10 02:54:22.328220', 'step': 6690, 'epoch': 3} +{'type': 'loss', 'content': 0.009426060132682323, 'timestamp': '2025-09-10 02:54:22.340915', 'step': 6691, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:22.394691', 'step': 6691, 'epoch': 3} +{'type': 'loss', 'content': 0.0031298561953008175, 'timestamp': '2025-09-10 02:54:22.400819', 'step': 6692, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:22.454558', 'step': 6692, 'epoch': 3} +{'type': 'loss', 'content': 6.302199471974745e-05, 'timestamp': '2025-09-10 02:54:22.460965', 'step': 6693, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:22.514212', 'step': 6693, 'epoch': 3} +{'type': 'loss', 'content': 4.824464485864155e-05, 'timestamp': '2025-09-10 02:54:22.517056', 'step': 6694, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:22.570838', 'step': 6694, 'epoch': 3} +{'type': 'loss', 'content': 0.0016505582025274634, 'timestamp': '2025-09-10 02:54:22.573222', 'step': 6695, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:22.626707', 'step': 6695, 'epoch': 3} +{'type': 'loss', 'content': 0.00028228361043147743, 'timestamp': '2025-09-10 02:54:22.632817', 'step': 6696, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:54:22.702093', 'step': 6696, 'epoch': 3} +{'type': 'loss', 'content': 0.0005732628633268178, 'timestamp': '2025-09-10 02:54:22.716098', 'step': 6697, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:22.769939', 'step': 6697, 'epoch': 3} +{'type': 'loss', 'content': 0.00011515268124639988, 'timestamp': '2025-09-10 02:54:22.771954', 'step': 6698, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:22.825416', 'step': 6698, 'epoch': 3} +{'type': 'loss', 'content': 0.00014832020679023117, 'timestamp': '2025-09-10 02:54:22.827720', 'step': 6699, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:22.880848', 'step': 6699, 'epoch': 3} +{'type': 'loss', 'content': 0.00014225866470951587, 'timestamp': '2025-09-10 02:54:22.886804', 'step': 6700, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:22.939354', 'step': 6700, 'epoch': 3} +{'type': 'loss', 'content': 0.0028337922412902117, 'timestamp': '2025-09-10 02:54:22.941673', 'step': 6701, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:22.994523', 'step': 6701, 'epoch': 3} +{'type': 'loss', 'content': 0.00041280841105617583, 'timestamp': '2025-09-10 02:54:22.996547', 'step': 6702, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:23.049596', 'step': 6702, 'epoch': 3} +{'type': 'loss', 'content': 5.0511145673226565e-05, 'timestamp': '2025-09-10 02:54:23.051659', 'step': 6703, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:23.104571', 'step': 6703, 'epoch': 3} +{'type': 'loss', 'content': 0.00044902818626724184, 'timestamp': '2025-09-10 02:54:23.111642', 'step': 6704, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:23.163809', 'step': 6704, 'epoch': 3} +{'type': 'loss', 'content': 0.0003448054485488683, 'timestamp': '2025-09-10 02:54:23.165820', 'step': 6705, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:23.220058', 'step': 6705, 'epoch': 3} +{'type': 'loss', 'content': 0.00020704987400677055, 'timestamp': '2025-09-10 02:54:23.229861', 'step': 6706, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:23.283371', 'step': 6706, 'epoch': 3} +{'type': 'loss', 'content': 0.0031007863581180573, 'timestamp': '2025-09-10 02:54:23.285568', 'step': 6707, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:23.338640', 'step': 6707, 'epoch': 3} +{'type': 'loss', 'content': 0.00010566863784333691, 'timestamp': '2025-09-10 02:54:23.344726', 'step': 6708, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:23.397390', 'step': 6708, 'epoch': 3} +{'type': 'loss', 'content': 0.00044948840513825417, 'timestamp': '2025-09-10 02:54:23.399488', 'step': 6709, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:23.452180', 'step': 6709, 'epoch': 3} +{'type': 'loss', 'content': 0.00018663638911675662, 'timestamp': '2025-09-10 02:54:23.454475', 'step': 6710, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:23.507460', 'step': 6710, 'epoch': 3} +{'type': 'loss', 'content': 0.00022228548186831176, 'timestamp': '2025-09-10 02:54:23.510410', 'step': 6711, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:23.563611', 'step': 6711, 'epoch': 3} +{'type': 'loss', 'content': 0.0004220163100399077, 'timestamp': '2025-09-10 02:54:23.570796', 'step': 6712, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:23.623249', 'step': 6712, 'epoch': 3} +{'type': 'loss', 'content': 0.00015388162864837795, 'timestamp': '2025-09-10 02:54:23.625434', 'step': 6713, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:23.680377', 'step': 6713, 'epoch': 3} +{'type': 'loss', 'content': 0.002114841714501381, 'timestamp': '2025-09-10 02:54:23.689988', 'step': 6714, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:23.744954', 'step': 6714, 'epoch': 3} +{'type': 'loss', 'content': 0.003809299087151885, 'timestamp': '2025-09-10 02:54:23.754734', 'step': 6715, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:23.808207', 'step': 6715, 'epoch': 3} +{'type': 'loss', 'content': 0.00018899877613876015, 'timestamp': '2025-09-10 02:54:23.814363', 'step': 6716, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:23.866805', 'step': 6716, 'epoch': 3} +{'type': 'loss', 'content': 0.00011735771840903908, 'timestamp': '2025-09-10 02:54:23.869107', 'step': 6717, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:23.922364', 'step': 6717, 'epoch': 3} +{'type': 'loss', 'content': 0.003337863367050886, 'timestamp': '2025-09-10 02:54:23.924628', 'step': 6718, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:23.978673', 'step': 6718, 'epoch': 3} +{'type': 'loss', 'content': 0.002060003113001585, 'timestamp': '2025-09-10 02:54:23.988474', 'step': 6719, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:24.041671', 'step': 6719, 'epoch': 3} +{'type': 'loss', 'content': 0.0036181341856718063, 'timestamp': '2025-09-10 02:54:24.047814', 'step': 6720, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:24.101959', 'step': 6720, 'epoch': 3} +{'type': 'loss', 'content': 0.0008899550884962082, 'timestamp': '2025-09-10 02:54:24.103951', 'step': 6721, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:24.157114', 'step': 6721, 'epoch': 3} +{'type': 'loss', 'content': 0.0014222814934328198, 'timestamp': '2025-09-10 02:54:24.165016', 'step': 6722, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:24.218578', 'step': 6722, 'epoch': 3} +{'type': 'loss', 'content': 0.00038225733442232013, 'timestamp': '2025-09-10 02:54:24.224815', 'step': 6723, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:24.278129', 'step': 6723, 'epoch': 3} +{'type': 'loss', 'content': 0.00885496474802494, 'timestamp': '2025-09-10 02:54:24.284369', 'step': 6724, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:24.337096', 'step': 6724, 'epoch': 3} +{'type': 'loss', 'content': 0.002544673625379801, 'timestamp': '2025-09-10 02:54:24.339314', 'step': 6725, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:24.393043', 'step': 6725, 'epoch': 3} +{'type': 'loss', 'content': 0.0023553676437586546, 'timestamp': '2025-09-10 02:54:24.395330', 'step': 6726, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:24.448916', 'step': 6726, 'epoch': 3} +{'type': 'loss', 'content': 0.0001859004405559972, 'timestamp': '2025-09-10 02:54:24.451110', 'step': 6727, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:24.505588', 'step': 6727, 'epoch': 3} +{'type': 'loss', 'content': 0.01435794122517109, 'timestamp': '2025-09-10 02:54:24.511726', 'step': 6728, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:24.566535', 'step': 6728, 'epoch': 3} +{'type': 'loss', 'content': 7.058423216221854e-05, 'timestamp': '2025-09-10 02:54:24.568981', 'step': 6729, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:24.623564', 'step': 6729, 'epoch': 3} +{'type': 'loss', 'content': 9.51773690758273e-05, 'timestamp': '2025-09-10 02:54:24.626097', 'step': 6730, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:24.679341', 'step': 6730, 'epoch': 3} +{'type': 'loss', 'content': 0.0003350157930981368, 'timestamp': '2025-09-10 02:54:24.681683', 'step': 6731, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:24.735359', 'step': 6731, 'epoch': 3} +{'type': 'loss', 'content': 0.00020194535318296403, 'timestamp': '2025-09-10 02:54:24.741560', 'step': 6732, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:24.794207', 'step': 6732, 'epoch': 3} +{'type': 'loss', 'content': 0.0005801634979434311, 'timestamp': '2025-09-10 02:54:24.796273', 'step': 6733, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:24.849054', 'step': 6733, 'epoch': 3} +{'type': 'loss', 'content': 7.747808558633551e-05, 'timestamp': '2025-09-10 02:54:24.851383', 'step': 6734, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:24.907099', 'step': 6734, 'epoch': 3} +{'type': 'loss', 'content': 0.0017583959270268679, 'timestamp': '2025-09-10 02:54:24.914988', 'step': 6735, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:24.976372', 'step': 6735, 'epoch': 3} +{'type': 'loss', 'content': 0.00021081813611090183, 'timestamp': '2025-09-10 02:54:24.987959', 'step': 6736, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:25.048637', 'step': 6736, 'epoch': 3} +{'type': 'loss', 'content': 0.0007808614755049348, 'timestamp': '2025-09-10 02:54:25.060164', 'step': 6737, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:25.113896', 'step': 6737, 'epoch': 3} +{'type': 'loss', 'content': 1.8432010620017536e-05, 'timestamp': '2025-09-10 02:54:25.116073', 'step': 6738, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:25.176039', 'step': 6738, 'epoch': 3} +{'type': 'loss', 'content': 6.287031283136457e-05, 'timestamp': '2025-09-10 02:54:25.186742', 'step': 6739, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:25.240629', 'step': 6739, 'epoch': 3} +{'type': 'loss', 'content': 0.016147976741194725, 'timestamp': '2025-09-10 02:54:25.246712', 'step': 6740, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:25.299975', 'step': 6740, 'epoch': 3} +{'type': 'loss', 'content': 5.1653467380674556e-05, 'timestamp': '2025-09-10 02:54:25.302338', 'step': 6741, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:25.356087', 'step': 6741, 'epoch': 3} +{'type': 'loss', 'content': 0.0005793520831502974, 'timestamp': '2025-09-10 02:54:25.358229', 'step': 6742, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:25.411891', 'step': 6742, 'epoch': 3} +{'type': 'loss', 'content': 0.0003996819432359189, 'timestamp': '2025-09-10 02:54:25.414140', 'step': 6743, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:25.467442', 'step': 6743, 'epoch': 3} +{'type': 'loss', 'content': 0.021318215876817703, 'timestamp': '2025-09-10 02:54:25.473966', 'step': 6744, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:25.526873', 'step': 6744, 'epoch': 3} +{'type': 'loss', 'content': 0.00040453224210068583, 'timestamp': '2025-09-10 02:54:25.529673', 'step': 6745, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:25.587648', 'step': 6745, 'epoch': 3} +{'type': 'loss', 'content': 0.0003764012362807989, 'timestamp': '2025-09-10 02:54:25.598072', 'step': 6746, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:25.652081', 'step': 6746, 'epoch': 3} +{'type': 'loss', 'content': 0.0010460015619173646, 'timestamp': '2025-09-10 02:54:25.654772', 'step': 6747, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:25.709953', 'step': 6747, 'epoch': 3} +{'type': 'loss', 'content': 0.0012851936044171453, 'timestamp': '2025-09-10 02:54:25.720491', 'step': 6748, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:25.774641', 'step': 6748, 'epoch': 3} +{'type': 'loss', 'content': 0.00038896952173672616, 'timestamp': '2025-09-10 02:54:25.777007', 'step': 6749, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:54:25.857007', 'step': 6749, 'epoch': 3} +{'type': 'loss', 'content': 0.0012903818860650063, 'timestamp': '2025-09-10 02:54:25.872009', 'step': 6750, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:25.927395', 'step': 6750, 'epoch': 3} +{'type': 'loss', 'content': 0.00016401773609686643, 'timestamp': '2025-09-10 02:54:25.929523', 'step': 6751, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:25.987942', 'step': 6751, 'epoch': 3} +{'type': 'loss', 'content': 0.00010856654989765957, 'timestamp': '2025-09-10 02:54:25.999143', 'step': 6752, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:26.052313', 'step': 6752, 'epoch': 3} +{'type': 'loss', 'content': 0.0001499974459875375, 'timestamp': '2025-09-10 02:54:26.054663', 'step': 6753, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:26.109075', 'step': 6753, 'epoch': 3} +{'type': 'loss', 'content': 0.00016375941049773246, 'timestamp': '2025-09-10 02:54:26.111442', 'step': 6754, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:26.165537', 'step': 6754, 'epoch': 3} +{'type': 'loss', 'content': 0.00024932180531322956, 'timestamp': '2025-09-10 02:54:26.167944', 'step': 6755, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:26.230016', 'step': 6755, 'epoch': 3} +{'type': 'loss', 'content': 0.00013631509500555694, 'timestamp': '2025-09-10 02:54:26.241686', 'step': 6756, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:26.295622', 'step': 6756, 'epoch': 3} +{'type': 'loss', 'content': 4.498567068367265e-05, 'timestamp': '2025-09-10 02:54:26.297958', 'step': 6757, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:54:26.366262', 'step': 6757, 'epoch': 3} +{'type': 'loss', 'content': 0.00039396085776388645, 'timestamp': '2025-09-10 02:54:26.378771', 'step': 6758, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:26.437901', 'step': 6758, 'epoch': 3} +{'type': 'loss', 'content': 0.00010092677985085174, 'timestamp': '2025-09-10 02:54:26.448267', 'step': 6759, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:26.503708', 'step': 6759, 'epoch': 3} +{'type': 'loss', 'content': 3.924220072804019e-05, 'timestamp': '2025-09-10 02:54:26.510153', 'step': 6760, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:26.563193', 'step': 6760, 'epoch': 3} +{'type': 'loss', 'content': 0.006220812443643808, 'timestamp': '2025-09-10 02:54:26.565276', 'step': 6761, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:26.618786', 'step': 6761, 'epoch': 3} +{'type': 'loss', 'content': 0.0005389533471316099, 'timestamp': '2025-09-10 02:54:26.625123', 'step': 6762, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:54:43.457599', 'step': 6762, 'epoch': 3} +{'type': 'pplx', 'content': 25709489.340655327, 'timestamp': '2025-09-10 02:54:43.460339', 'step': 6762, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:43.514618', 'step': 6762, 'epoch': 3} +{'type': 'loss', 'content': 0.0024434844963252544, 'timestamp': '2025-09-10 02:54:43.516606', 'step': 6763, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:43.570014', 'step': 6763, 'epoch': 3} +{'type': 'loss', 'content': 0.0022615003399550915, 'timestamp': '2025-09-10 02:54:43.575873', 'step': 6764, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:43.632173', 'step': 6764, 'epoch': 3} +{'type': 'loss', 'content': 0.00029734117561019957, 'timestamp': '2025-09-10 02:54:43.643397', 'step': 6765, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:43.704713', 'step': 6765, 'epoch': 3} +{'type': 'loss', 'content': 0.0020704036578536034, 'timestamp': '2025-09-10 02:54:43.719871', 'step': 6766, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:43.775942', 'step': 6766, 'epoch': 3} +{'type': 'loss', 'content': 6.4070613916555885e-06, 'timestamp': '2025-09-10 02:54:43.782902', 'step': 6767, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:43.850891', 'step': 6767, 'epoch': 3} +{'type': 'loss', 'content': 0.0003174375160597265, 'timestamp': '2025-09-10 02:54:43.862141', 'step': 6768, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:43.921109', 'step': 6768, 'epoch': 3} +{'type': 'loss', 'content': 0.0021674581803381443, 'timestamp': '2025-09-10 02:54:43.932634', 'step': 6769, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:43.991669', 'step': 6769, 'epoch': 3} +{'type': 'loss', 'content': 0.004266251809895039, 'timestamp': '2025-09-10 02:54:43.997338', 'step': 6770, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:44.056415', 'step': 6770, 'epoch': 3} +{'type': 'loss', 'content': 0.0001336360292043537, 'timestamp': '2025-09-10 02:54:44.066066', 'step': 6771, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:44.127146', 'step': 6771, 'epoch': 3} +{'type': 'loss', 'content': 0.00016490685811731964, 'timestamp': '2025-09-10 02:54:44.137474', 'step': 6772, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:44.209500', 'step': 6772, 'epoch': 3} +{'type': 'loss', 'content': 0.0033168368972837925, 'timestamp': '2025-09-10 02:54:44.233944', 'step': 6773, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:44.311417', 'step': 6773, 'epoch': 3} +{'type': 'loss', 'content': 0.0043604555539786816, 'timestamp': '2025-09-10 02:54:44.323862', 'step': 6774, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:44.397438', 'step': 6774, 'epoch': 3} +{'type': 'loss', 'content': 7.357293361565098e-05, 'timestamp': '2025-09-10 02:54:44.407080', 'step': 6775, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:44.468995', 'step': 6775, 'epoch': 3} +{'type': 'loss', 'content': 6.862401642138138e-05, 'timestamp': '2025-09-10 02:54:44.476295', 'step': 6776, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:44.544292', 'step': 6776, 'epoch': 3} +{'type': 'loss', 'content': 0.0001584481360623613, 'timestamp': '2025-09-10 02:54:44.551309', 'step': 6777, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:44.606774', 'step': 6777, 'epoch': 3} +{'type': 'loss', 'content': 0.0006461879820562899, 'timestamp': '2025-09-10 02:54:44.609148', 'step': 6778, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:44.662048', 'step': 6778, 'epoch': 3} +{'type': 'loss', 'content': 0.0011099242838099599, 'timestamp': '2025-09-10 02:54:44.664165', 'step': 6779, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:44.718045', 'step': 6779, 'epoch': 3} +{'type': 'loss', 'content': 0.0007155724451877177, 'timestamp': '2025-09-10 02:54:44.724151', 'step': 6780, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:44.776648', 'step': 6780, 'epoch': 3} +{'type': 'loss', 'content': 0.060069210827350616, 'timestamp': '2025-09-10 02:54:44.786951', 'step': 6781, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:44.840157', 'step': 6781, 'epoch': 3} +{'type': 'loss', 'content': 4.3121788621647283e-05, 'timestamp': '2025-09-10 02:54:44.849745', 'step': 6782, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:44.903540', 'step': 6782, 'epoch': 3} +{'type': 'loss', 'content': 0.0012703310931101441, 'timestamp': '2025-09-10 02:54:44.906073', 'step': 6783, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:44.959318', 'step': 6783, 'epoch': 3} +{'type': 'loss', 'content': 2.4759003281360492e-05, 'timestamp': '2025-09-10 02:54:44.968426', 'step': 6784, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:45.020785', 'step': 6784, 'epoch': 3} +{'type': 'loss', 'content': 3.132286656182259e-05, 'timestamp': '2025-09-10 02:54:45.023260', 'step': 6785, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:45.076094', 'step': 6785, 'epoch': 3} +{'type': 'loss', 'content': 0.0002933169307652861, 'timestamp': '2025-09-10 02:54:45.078193', 'step': 6786, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:45.131430', 'step': 6786, 'epoch': 3} +{'type': 'loss', 'content': 0.005814902950078249, 'timestamp': '2025-09-10 02:54:45.133592', 'step': 6787, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:45.187016', 'step': 6787, 'epoch': 3} +{'type': 'loss', 'content': 1.8819564502337016e-05, 'timestamp': '2025-09-10 02:54:45.197409', 'step': 6788, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:54:45.256777', 'step': 6788, 'epoch': 3} +{'type': 'loss', 'content': 0.0004507180710788816, 'timestamp': '2025-09-10 02:54:45.268370', 'step': 6789, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:45.320792', 'step': 6789, 'epoch': 3} +{'type': 'loss', 'content': 0.00028855472919531167, 'timestamp': '2025-09-10 02:54:45.323134', 'step': 6790, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:45.376029', 'step': 6790, 'epoch': 3} +{'type': 'loss', 'content': 1.1611181435000617e-05, 'timestamp': '2025-09-10 02:54:45.378054', 'step': 6791, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:45.430429', 'step': 6791, 'epoch': 3} +{'type': 'loss', 'content': 4.1199240513378754e-05, 'timestamp': '2025-09-10 02:54:45.436276', 'step': 6792, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:45.487944', 'step': 6792, 'epoch': 3} +{'type': 'loss', 'content': 0.00010968972492264584, 'timestamp': '2025-09-10 02:54:45.491053', 'step': 6793, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 512], 'flops': 10240062230528.0}, 'timestamp': '2025-09-10 02:54:45.566858', 'step': 6793, 'epoch': 3} +{'type': 'loss', 'content': 1.6435873476439156e-05, 'timestamp': '2025-09-10 02:54:45.580946', 'step': 6794, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:45.633300', 'step': 6794, 'epoch': 3} +{'type': 'loss', 'content': 0.002485661068931222, 'timestamp': '2025-09-10 02:54:45.636608', 'step': 6795, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:45.689316', 'step': 6795, 'epoch': 3} +{'type': 'loss', 'content': 0.03919482231140137, 'timestamp': '2025-09-10 02:54:45.694893', 'step': 6796, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:45.746777', 'step': 6796, 'epoch': 3} +{'type': 'loss', 'content': 0.036987531930208206, 'timestamp': '2025-09-10 02:54:45.756993', 'step': 6797, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:45.810314', 'step': 6797, 'epoch': 3} +{'type': 'loss', 'content': 0.004275764338672161, 'timestamp': '2025-09-10 02:54:45.812720', 'step': 6798, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:54:45.878715', 'step': 6798, 'epoch': 3} +{'type': 'loss', 'content': 7.209383329609409e-05, 'timestamp': '2025-09-10 02:54:45.890913', 'step': 6799, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:45.944968', 'step': 6799, 'epoch': 3} +{'type': 'loss', 'content': 5.097974280943163e-05, 'timestamp': '2025-09-10 02:54:45.950844', 'step': 6800, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:46.004181', 'step': 6800, 'epoch': 3} +{'type': 'loss', 'content': 5.338800110621378e-05, 'timestamp': '2025-09-10 02:54:46.006518', 'step': 6801, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:54:46.067939', 'step': 6801, 'epoch': 3} +{'type': 'loss', 'content': 0.00010365008347434923, 'timestamp': '2025-09-10 02:54:46.079051', 'step': 6802, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:46.131652', 'step': 6802, 'epoch': 3} +{'type': 'loss', 'content': 0.05696597322821617, 'timestamp': '2025-09-10 02:54:46.134011', 'step': 6803, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:46.188195', 'step': 6803, 'epoch': 3} +{'type': 'loss', 'content': 0.0016694695223122835, 'timestamp': '2025-09-10 02:54:46.198757', 'step': 6804, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:54:46.263548', 'step': 6804, 'epoch': 3} +{'type': 'loss', 'content': 7.597422518301755e-05, 'timestamp': '2025-09-10 02:54:46.276786', 'step': 6805, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:46.329367', 'step': 6805, 'epoch': 3} +{'type': 'loss', 'content': 0.008031840436160564, 'timestamp': '2025-09-10 02:54:46.331832', 'step': 6806, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:54:46.398241', 'step': 6806, 'epoch': 3} +{'type': 'loss', 'content': 0.0027756125200539827, 'timestamp': '2025-09-10 02:54:46.410488', 'step': 6807, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:46.463555', 'step': 6807, 'epoch': 3} +{'type': 'loss', 'content': 0.0013087447732686996, 'timestamp': '2025-09-10 02:54:46.469217', 'step': 6808, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:46.521582', 'step': 6808, 'epoch': 3} +{'type': 'loss', 'content': 0.017276279628276825, 'timestamp': '2025-09-10 02:54:46.529925', 'step': 6809, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:46.582965', 'step': 6809, 'epoch': 3} +{'type': 'loss', 'content': 0.000820252753328532, 'timestamp': '2025-09-10 02:54:46.585063', 'step': 6810, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:46.637940', 'step': 6810, 'epoch': 3} +{'type': 'loss', 'content': 0.00012021789734717458, 'timestamp': '2025-09-10 02:54:46.644396', 'step': 6811, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:46.697369', 'step': 6811, 'epoch': 3} +{'type': 'loss', 'content': 0.00021653142175637186, 'timestamp': '2025-09-10 02:54:46.703327', 'step': 6812, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:46.755527', 'step': 6812, 'epoch': 3} +{'type': 'loss', 'content': 2.710442822717596e-05, 'timestamp': '2025-09-10 02:54:46.765781', 'step': 6813, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:46.819110', 'step': 6813, 'epoch': 3} +{'type': 'loss', 'content': 0.0005414400948211551, 'timestamp': '2025-09-10 02:54:46.827337', 'step': 6814, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:46.879965', 'step': 6814, 'epoch': 3} +{'type': 'loss', 'content': 0.00010980067600030452, 'timestamp': '2025-09-10 02:54:46.882124', 'step': 6815, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:46.934468', 'step': 6815, 'epoch': 3} +{'type': 'loss', 'content': 8.160889410646632e-05, 'timestamp': '2025-09-10 02:54:46.940302', 'step': 6816, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:46.992671', 'step': 6816, 'epoch': 3} +{'type': 'loss', 'content': 0.0015756313223391771, 'timestamp': '2025-09-10 02:54:46.994834', 'step': 6817, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:47.047557', 'step': 6817, 'epoch': 3} +{'type': 'loss', 'content': 0.00150921696331352, 'timestamp': '2025-09-10 02:54:47.055856', 'step': 6818, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:47.113447', 'step': 6818, 'epoch': 3} +{'type': 'loss', 'content': 0.0003578981850296259, 'timestamp': '2025-09-10 02:54:47.123845', 'step': 6819, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:47.176978', 'step': 6819, 'epoch': 3} +{'type': 'loss', 'content': 0.007674887776374817, 'timestamp': '2025-09-10 02:54:47.182779', 'step': 6820, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:47.239365', 'step': 6820, 'epoch': 3} +{'type': 'loss', 'content': 0.00012018119014101103, 'timestamp': '2025-09-10 02:54:47.250594', 'step': 6821, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:47.303359', 'step': 6821, 'epoch': 3} +{'type': 'loss', 'content': 0.0006332649500109255, 'timestamp': '2025-09-10 02:54:47.305445', 'step': 6822, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:47.357966', 'step': 6822, 'epoch': 3} +{'type': 'loss', 'content': 0.00015875112148933113, 'timestamp': '2025-09-10 02:54:47.364368', 'step': 6823, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:47.417096', 'step': 6823, 'epoch': 3} +{'type': 'loss', 'content': 0.0028945859521627426, 'timestamp': '2025-09-10 02:54:47.425942', 'step': 6824, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:47.482476', 'step': 6824, 'epoch': 3} +{'type': 'loss', 'content': 0.00010640511027304456, 'timestamp': '2025-09-10 02:54:47.493672', 'step': 6825, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:47.546368', 'step': 6825, 'epoch': 3} +{'type': 'loss', 'content': 0.0002750347484834492, 'timestamp': '2025-09-10 02:54:47.552871', 'step': 6826, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:47.610655', 'step': 6826, 'epoch': 3} +{'type': 'loss', 'content': 0.0017221200978383422, 'timestamp': '2025-09-10 02:54:47.613447', 'step': 6827, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:47.667468', 'step': 6827, 'epoch': 3} +{'type': 'loss', 'content': 0.00025323693989776075, 'timestamp': '2025-09-10 02:54:47.673506', 'step': 6828, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:47.726256', 'step': 6828, 'epoch': 3} +{'type': 'loss', 'content': 0.0016511676367372274, 'timestamp': '2025-09-10 02:54:47.728336', 'step': 6829, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:47.782164', 'step': 6829, 'epoch': 3} +{'type': 'loss', 'content': 0.004770257510244846, 'timestamp': '2025-09-10 02:54:47.784259', 'step': 6830, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:47.837576', 'step': 6830, 'epoch': 3} +{'type': 'loss', 'content': 0.00022807701316196471, 'timestamp': '2025-09-10 02:54:47.839917', 'step': 6831, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:47.892651', 'step': 6831, 'epoch': 3} +{'type': 'loss', 'content': 0.0015882852021604776, 'timestamp': '2025-09-10 02:54:47.898252', 'step': 6832, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:47.950227', 'step': 6832, 'epoch': 3} +{'type': 'loss', 'content': 0.0011089268373325467, 'timestamp': '2025-09-10 02:54:47.952495', 'step': 6833, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:48.005620', 'step': 6833, 'epoch': 3} +{'type': 'loss', 'content': 0.01070477720350027, 'timestamp': '2025-09-10 02:54:48.015218', 'step': 6834, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:48.068058', 'step': 6834, 'epoch': 3} +{'type': 'loss', 'content': 0.003968815319240093, 'timestamp': '2025-09-10 02:54:48.070244', 'step': 6835, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:48.123523', 'step': 6835, 'epoch': 3} +{'type': 'loss', 'content': 0.0012771779438480735, 'timestamp': '2025-09-10 02:54:48.129402', 'step': 6836, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:48.184171', 'step': 6836, 'epoch': 3} +{'type': 'loss', 'content': 0.00029552009073086083, 'timestamp': '2025-09-10 02:54:48.190900', 'step': 6837, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:48.243804', 'step': 6837, 'epoch': 3} +{'type': 'loss', 'content': 0.0011362083023414016, 'timestamp': '2025-09-10 02:54:48.246168', 'step': 6838, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:48.299423', 'step': 6838, 'epoch': 3} +{'type': 'loss', 'content': 0.024538684636354446, 'timestamp': '2025-09-10 02:54:48.307626', 'step': 6839, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:48.360714', 'step': 6839, 'epoch': 3} +{'type': 'loss', 'content': 0.0003189076960552484, 'timestamp': '2025-09-10 02:54:48.366478', 'step': 6840, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:48.419664', 'step': 6840, 'epoch': 3} +{'type': 'loss', 'content': 0.00022763405286241323, 'timestamp': '2025-09-10 02:54:48.422024', 'step': 6841, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:48.476781', 'step': 6841, 'epoch': 3} +{'type': 'loss', 'content': 7.701670256210491e-05, 'timestamp': '2025-09-10 02:54:48.483004', 'step': 6842, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:48.535962', 'step': 6842, 'epoch': 3} +{'type': 'loss', 'content': 0.00043326723971404135, 'timestamp': '2025-09-10 02:54:48.538495', 'step': 6843, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:48.591310', 'step': 6843, 'epoch': 3} +{'type': 'loss', 'content': 0.0008735000155866146, 'timestamp': '2025-09-10 02:54:48.597296', 'step': 6844, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:48.649920', 'step': 6844, 'epoch': 3} +{'type': 'loss', 'content': 6.3418920035474e-05, 'timestamp': '2025-09-10 02:54:48.652075', 'step': 6845, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:48.704780', 'step': 6845, 'epoch': 3} +{'type': 'loss', 'content': 0.009570607915520668, 'timestamp': '2025-09-10 02:54:48.711268', 'step': 6846, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:48.765939', 'step': 6846, 'epoch': 3} +{'type': 'loss', 'content': 0.00020284978381823748, 'timestamp': '2025-09-10 02:54:48.775752', 'step': 6847, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:48.828549', 'step': 6847, 'epoch': 3} +{'type': 'loss', 'content': 9.389665501657873e-05, 'timestamp': '2025-09-10 02:54:48.834622', 'step': 6848, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:48.888460', 'step': 6848, 'epoch': 3} +{'type': 'loss', 'content': 0.0005315937451086938, 'timestamp': '2025-09-10 02:54:48.897550', 'step': 6849, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:54:48.964068', 'step': 6849, 'epoch': 3} +{'type': 'loss', 'content': 0.0032117923256009817, 'timestamp': '2025-09-10 02:54:48.976314', 'step': 6850, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:49.037264', 'step': 6850, 'epoch': 3} +{'type': 'loss', 'content': 0.00123793154489249, 'timestamp': '2025-09-10 02:54:49.048181', 'step': 6851, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:49.101799', 'step': 6851, 'epoch': 3} +{'type': 'loss', 'content': 0.0010277210967615247, 'timestamp': '2025-09-10 02:54:49.107839', 'step': 6852, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:49.164667', 'step': 6852, 'epoch': 3} +{'type': 'loss', 'content': 0.001223025843501091, 'timestamp': '2025-09-10 02:54:49.175882', 'step': 6853, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:49.229444', 'step': 6853, 'epoch': 3} +{'type': 'loss', 'content': 0.003343747928738594, 'timestamp': '2025-09-10 02:54:49.231884', 'step': 6854, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:49.284400', 'step': 6854, 'epoch': 3} +{'type': 'loss', 'content': 0.0002088952751364559, 'timestamp': '2025-09-10 02:54:49.286834', 'step': 6855, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:49.339605', 'step': 6855, 'epoch': 3} +{'type': 'loss', 'content': 0.0011284074280411005, 'timestamp': '2025-09-10 02:54:49.346951', 'step': 6856, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:49.399147', 'step': 6856, 'epoch': 3} +{'type': 'loss', 'content': 0.0004039173945784569, 'timestamp': '2025-09-10 02:54:49.405935', 'step': 6857, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:49.464041', 'step': 6857, 'epoch': 3} +{'type': 'loss', 'content': 0.00048438538215123117, 'timestamp': '2025-09-10 02:54:49.474487', 'step': 6858, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:49.527583', 'step': 6858, 'epoch': 3} +{'type': 'loss', 'content': 0.00042181755998171866, 'timestamp': '2025-09-10 02:54:49.529880', 'step': 6859, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:49.584969', 'step': 6859, 'epoch': 3} +{'type': 'loss', 'content': 0.0008075033547356725, 'timestamp': '2025-09-10 02:54:49.595546', 'step': 6860, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:49.648093', 'step': 6860, 'epoch': 3} +{'type': 'loss', 'content': 0.0011093399953097105, 'timestamp': '2025-09-10 02:54:49.650342', 'step': 6861, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:49.703361', 'step': 6861, 'epoch': 3} +{'type': 'loss', 'content': 0.0029837351758033037, 'timestamp': '2025-09-10 02:54:49.705555', 'step': 6862, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:49.758221', 'step': 6862, 'epoch': 3} +{'type': 'loss', 'content': 0.0002768567646853626, 'timestamp': '2025-09-10 02:54:49.760456', 'step': 6863, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:54:49.812989', 'step': 6863, 'epoch': 3} +{'type': 'loss', 'content': 0.000653381459414959, 'timestamp': '2025-09-10 02:54:49.818571', 'step': 6864, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:54:49.878019', 'step': 6864, 'epoch': 3} +{'type': 'loss', 'content': 0.002829869044944644, 'timestamp': '2025-09-10 02:54:49.890052', 'step': 6865, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:49.943305', 'step': 6865, 'epoch': 3} +{'type': 'loss', 'content': 0.0003227445122320205, 'timestamp': '2025-09-10 02:54:49.945571', 'step': 6866, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:49.999926', 'step': 6866, 'epoch': 3} +{'type': 'loss', 'content': 0.001285174279473722, 'timestamp': '2025-09-10 02:54:50.009710', 'step': 6867, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:50.062418', 'step': 6867, 'epoch': 3} +{'type': 'loss', 'content': 0.003444342641159892, 'timestamp': '2025-09-10 02:54:50.068115', 'step': 6868, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:50.120463', 'step': 6868, 'epoch': 3} +{'type': 'loss', 'content': 0.0028381331358104944, 'timestamp': '2025-09-10 02:54:50.122707', 'step': 6869, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:50.175218', 'step': 6869, 'epoch': 3} +{'type': 'loss', 'content': 0.0005986435571685433, 'timestamp': '2025-09-10 02:54:50.177684', 'step': 6870, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:50.238692', 'step': 6870, 'epoch': 3} +{'type': 'loss', 'content': 0.0004264956805855036, 'timestamp': '2025-09-10 02:54:50.249594', 'step': 6871, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:50.302104', 'step': 6871, 'epoch': 3} +{'type': 'loss', 'content': 0.0006066313362680376, 'timestamp': '2025-09-10 02:54:50.307805', 'step': 6872, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:50.359643', 'step': 6872, 'epoch': 3} +{'type': 'loss', 'content': 0.0038845862727612257, 'timestamp': '2025-09-10 02:54:50.361881', 'step': 6873, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:50.414169', 'step': 6873, 'epoch': 3} +{'type': 'loss', 'content': 0.00452636880800128, 'timestamp': '2025-09-10 02:54:50.416471', 'step': 6874, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:54:50.470558', 'step': 6874, 'epoch': 3} +{'type': 'loss', 'content': 0.002486585173755884, 'timestamp': '2025-09-10 02:54:50.480150', 'step': 6875, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:50.532924', 'step': 6875, 'epoch': 3} +{'type': 'loss', 'content': 0.007920735515654087, 'timestamp': '2025-09-10 02:54:50.538714', 'step': 6876, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:50.591224', 'step': 6876, 'epoch': 3} +{'type': 'loss', 'content': 0.0034601124934852123, 'timestamp': '2025-09-10 02:54:50.593364', 'step': 6877, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:50.654075', 'step': 6877, 'epoch': 3} +{'type': 'loss', 'content': 0.0003639400820247829, 'timestamp': '2025-09-10 02:54:50.664927', 'step': 6878, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:50.717906', 'step': 6878, 'epoch': 3} +{'type': 'loss', 'content': 0.0006268096040003002, 'timestamp': '2025-09-10 02:54:50.720773', 'step': 6879, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:50.773184', 'step': 6879, 'epoch': 3} +{'type': 'loss', 'content': 0.0022460625041276217, 'timestamp': '2025-09-10 02:54:50.780479', 'step': 6880, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:50.832593', 'step': 6880, 'epoch': 3} +{'type': 'loss', 'content': 0.0001877809118013829, 'timestamp': '2025-09-10 02:54:50.839148', 'step': 6881, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:50.892255', 'step': 6881, 'epoch': 3} +{'type': 'loss', 'content': 4.215891021885909e-05, 'timestamp': '2025-09-10 02:54:50.898785', 'step': 6882, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:50.951255', 'step': 6882, 'epoch': 3} +{'type': 'loss', 'content': 1.2957565559190698e-05, 'timestamp': '2025-09-10 02:54:50.953388', 'step': 6883, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:51.006321', 'step': 6883, 'epoch': 3} +{'type': 'loss', 'content': 0.004247861448675394, 'timestamp': '2025-09-10 02:54:51.012096', 'step': 6884, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:51.064589', 'step': 6884, 'epoch': 3} +{'type': 'loss', 'content': 0.0009423012961633503, 'timestamp': '2025-09-10 02:54:51.071130', 'step': 6885, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:51.124102', 'step': 6885, 'epoch': 3} +{'type': 'loss', 'content': 0.0381709448993206, 'timestamp': '2025-09-10 02:54:51.126314', 'step': 6886, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:51.179485', 'step': 6886, 'epoch': 3} +{'type': 'loss', 'content': 0.0006023383466526866, 'timestamp': '2025-09-10 02:54:51.181598', 'step': 6887, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:51.234944', 'step': 6887, 'epoch': 3} +{'type': 'loss', 'content': 0.0022470338735729456, 'timestamp': '2025-09-10 02:54:51.240675', 'step': 6888, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:54:51.293439', 'step': 6888, 'epoch': 3} +{'type': 'loss', 'content': 7.219224789878353e-05, 'timestamp': '2025-09-10 02:54:51.301480', 'step': 6889, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:51.355968', 'step': 6889, 'epoch': 3} +{'type': 'loss', 'content': 0.00015506960335187614, 'timestamp': '2025-09-10 02:54:51.365754', 'step': 6890, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:51.419019', 'step': 6890, 'epoch': 3} +{'type': 'loss', 'content': 0.0007731267251074314, 'timestamp': '2025-09-10 02:54:51.421198', 'step': 6891, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:51.474004', 'step': 6891, 'epoch': 3} +{'type': 'loss', 'content': 0.0004507832054514438, 'timestamp': '2025-09-10 02:54:51.479702', 'step': 6892, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:51.531784', 'step': 6892, 'epoch': 3} +{'type': 'loss', 'content': 0.00043676866334863007, 'timestamp': '2025-09-10 02:54:51.533889', 'step': 6893, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:54:51.591693', 'step': 6893, 'epoch': 3} +{'type': 'loss', 'content': 0.00028911096160300076, 'timestamp': '2025-09-10 02:54:51.602060', 'step': 6894, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:51.655361', 'step': 6894, 'epoch': 3} +{'type': 'loss', 'content': 0.0006486875936388969, 'timestamp': '2025-09-10 02:54:51.657857', 'step': 6895, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:51.710680', 'step': 6895, 'epoch': 3} +{'type': 'loss', 'content': 0.00047094575711525977, 'timestamp': '2025-09-10 02:54:51.716484', 'step': 6896, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:51.768601', 'step': 6896, 'epoch': 3} +{'type': 'loss', 'content': 0.013813263736665249, 'timestamp': '2025-09-10 02:54:51.770791', 'step': 6897, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:54:51.823594', 'step': 6897, 'epoch': 3} +{'type': 'loss', 'content': 0.0023737193550914526, 'timestamp': '2025-09-10 02:54:51.830137', 'step': 6898, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:54:51.882663', 'step': 6898, 'epoch': 3} +{'type': 'loss', 'content': 0.0007876521558500826, 'timestamp': '2025-09-10 02:54:51.884928', 'step': 6899, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:51.937310', 'step': 6899, 'epoch': 3} +{'type': 'loss', 'content': 0.0002053142961813137, 'timestamp': '2025-09-10 02:54:51.943120', 'step': 6900, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:54:52.002536', 'step': 6900, 'epoch': 3} +{'type': 'loss', 'content': 0.0008010483579710126, 'timestamp': '2025-09-10 02:54:52.014315', 'step': 6901, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:54:52.067103', 'step': 6901, 'epoch': 3} +{'type': 'loss', 'content': 0.00018803616694640368, 'timestamp': '2025-09-10 02:54:52.069257', 'step': 6902, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:54:52.121903', 'step': 6902, 'epoch': 3} +{'type': 'loss', 'content': 9.800043335417286e-05, 'timestamp': '2025-09-10 02:54:52.124025', 'step': 6903, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:54:52.176378', 'step': 6903, 'epoch': 3} +{'type': 'loss', 'content': 0.004912449512630701, 'timestamp': '2025-09-10 02:54:52.181933', 'step': 6904, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:54:52.252522', 'step': 6904, 'epoch': 3} +{'type': 'loss', 'content': 0.0006704013212583959, 'timestamp': '2025-09-10 02:54:52.267166', 'step': 6905, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:54:52.319700', 'step': 6905, 'epoch': 3} +{'type': 'loss', 'content': 0.00028490772820077837, 'timestamp': '2025-09-10 02:54:52.321853', 'step': 6906, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:54:52.374454', 'step': 6906, 'epoch': 3} +{'type': 'loss', 'content': 0.00011347966938046739, 'timestamp': '2025-09-10 02:54:52.376518', 'step': 6907, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:54:52.431023', 'step': 6907, 'epoch': 3} +{'type': 'loss', 'content': 0.00026673488900996745, 'timestamp': '2025-09-10 02:54:52.441604', 'step': 6908, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:54:52.494169', 'step': 6908, 'epoch': 3} +{'type': 'loss', 'content': 0.00011277222074568272, 'timestamp': '2025-09-10 02:54:52.496318', 'step': 6909, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:55:09.333076', 'step': 6909, 'epoch': 3} +{'type': 'pplx', 'content': 24073172.905026663, 'timestamp': '2025-09-10 02:55:09.335891', 'step': 6909, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:09.390325', 'step': 6909, 'epoch': 3} +{'type': 'loss', 'content': 0.004598031286150217, 'timestamp': '2025-09-10 02:55:09.396153', 'step': 6910, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:09.450747', 'step': 6910, 'epoch': 3} +{'type': 'loss', 'content': 5.856903226231225e-05, 'timestamp': '2025-09-10 02:55:09.453038', 'step': 6911, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:09.506875', 'step': 6911, 'epoch': 3} +{'type': 'loss', 'content': 9.248633432434872e-05, 'timestamp': '2025-09-10 02:55:09.513364', 'step': 6912, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:09.565957', 'step': 6912, 'epoch': 3} +{'type': 'loss', 'content': 0.00011987597827101126, 'timestamp': '2025-09-10 02:55:09.568207', 'step': 6913, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:09.621891', 'step': 6913, 'epoch': 3} +{'type': 'loss', 'content': 0.0017569736810401082, 'timestamp': '2025-09-10 02:55:09.624152', 'step': 6914, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:09.677477', 'step': 6914, 'epoch': 3} +{'type': 'loss', 'content': 5.841868551215157e-05, 'timestamp': '2025-09-10 02:55:09.680091', 'step': 6915, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:09.733137', 'step': 6915, 'epoch': 3} +{'type': 'loss', 'content': 1.9759321730816737e-05, 'timestamp': '2025-09-10 02:55:09.739007', 'step': 6916, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:09.791683', 'step': 6916, 'epoch': 3} +{'type': 'loss', 'content': 0.004630325827747583, 'timestamp': '2025-09-10 02:55:09.794073', 'step': 6917, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:09.847491', 'step': 6917, 'epoch': 3} +{'type': 'loss', 'content': 0.015202411450445652, 'timestamp': '2025-09-10 02:55:09.849559', 'step': 6918, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:09.902541', 'step': 6918, 'epoch': 3} +{'type': 'loss', 'content': 1.5344645362347364e-05, 'timestamp': '2025-09-10 02:55:09.904888', 'step': 6919, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:09.958603', 'step': 6919, 'epoch': 3} +{'type': 'loss', 'content': 0.0002283619687659666, 'timestamp': '2025-09-10 02:55:09.969006', 'step': 6920, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:10.021408', 'step': 6920, 'epoch': 3} +{'type': 'loss', 'content': 0.00020906592544633895, 'timestamp': '2025-09-10 02:55:10.023685', 'step': 6921, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:10.077503', 'step': 6921, 'epoch': 3} +{'type': 'loss', 'content': 0.00047006976092234254, 'timestamp': '2025-09-10 02:55:10.083502', 'step': 6922, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:10.137343', 'step': 6922, 'epoch': 3} +{'type': 'loss', 'content': 9.800849511520937e-05, 'timestamp': '2025-09-10 02:55:10.139832', 'step': 6923, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:10.193459', 'step': 6923, 'epoch': 3} +{'type': 'loss', 'content': 0.0005407427088357508, 'timestamp': '2025-09-10 02:55:10.199521', 'step': 6924, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:10.251994', 'step': 6924, 'epoch': 3} +{'type': 'loss', 'content': 8.559710840927437e-05, 'timestamp': '2025-09-10 02:55:10.254974', 'step': 6925, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:10.307892', 'step': 6925, 'epoch': 3} +{'type': 'loss', 'content': 0.0008459268137812614, 'timestamp': '2025-09-10 02:55:10.310318', 'step': 6926, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:10.364136', 'step': 6926, 'epoch': 3} +{'type': 'loss', 'content': 0.0002495987864676863, 'timestamp': '2025-09-10 02:55:10.366150', 'step': 6927, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:10.419306', 'step': 6927, 'epoch': 3} +{'type': 'loss', 'content': 9.783969289856032e-05, 'timestamp': '2025-09-10 02:55:10.425002', 'step': 6928, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:10.477626', 'step': 6928, 'epoch': 3} +{'type': 'loss', 'content': 0.0005323676159605384, 'timestamp': '2025-09-10 02:55:10.485607', 'step': 6929, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:10.539235', 'step': 6929, 'epoch': 3} +{'type': 'loss', 'content': 0.002948866691440344, 'timestamp': '2025-09-10 02:55:10.542061', 'step': 6930, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:10.595925', 'step': 6930, 'epoch': 3} +{'type': 'loss', 'content': 0.009427659213542938, 'timestamp': '2025-09-10 02:55:10.598456', 'step': 6931, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:10.652790', 'step': 6931, 'epoch': 3} +{'type': 'loss', 'content': 0.0004973894683644176, 'timestamp': '2025-09-10 02:55:10.658907', 'step': 6932, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:10.711998', 'step': 6932, 'epoch': 3} +{'type': 'loss', 'content': 0.011988679878413677, 'timestamp': '2025-09-10 02:55:10.714246', 'step': 6933, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:10.767798', 'step': 6933, 'epoch': 3} +{'type': 'loss', 'content': 0.0034859830047935247, 'timestamp': '2025-09-10 02:55:10.777388', 'step': 6934, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:10.830896', 'step': 6934, 'epoch': 3} +{'type': 'loss', 'content': 0.009490315802395344, 'timestamp': '2025-09-10 02:55:10.832960', 'step': 6935, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:10.885797', 'step': 6935, 'epoch': 3} +{'type': 'loss', 'content': 0.0011611237423494458, 'timestamp': '2025-09-10 02:55:10.891645', 'step': 6936, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:10.943960', 'step': 6936, 'epoch': 3} +{'type': 'loss', 'content': 8.195281407097355e-05, 'timestamp': '2025-09-10 02:55:10.945923', 'step': 6937, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:10.998381', 'step': 6937, 'epoch': 3} +{'type': 'loss', 'content': 0.00026475408230908215, 'timestamp': '2025-09-10 02:55:11.004942', 'step': 6938, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:11.058348', 'step': 6938, 'epoch': 3} +{'type': 'loss', 'content': 8.223136683227494e-05, 'timestamp': '2025-09-10 02:55:11.064673', 'step': 6939, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:11.118326', 'step': 6939, 'epoch': 3} +{'type': 'loss', 'content': 0.000286032009171322, 'timestamp': '2025-09-10 02:55:11.128699', 'step': 6940, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:11.181675', 'step': 6940, 'epoch': 3} +{'type': 'loss', 'content': 0.0009600855992175639, 'timestamp': '2025-09-10 02:55:11.184032', 'step': 6941, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:11.237713', 'step': 6941, 'epoch': 3} +{'type': 'loss', 'content': 2.1028057744842954e-05, 'timestamp': '2025-09-10 02:55:11.239833', 'step': 6942, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:11.292249', 'step': 6942, 'epoch': 3} +{'type': 'loss', 'content': 0.00015771265316288918, 'timestamp': '2025-09-10 02:55:11.294532', 'step': 6943, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:11.347407', 'step': 6943, 'epoch': 3} +{'type': 'loss', 'content': 0.00014096121594775468, 'timestamp': '2025-09-10 02:55:11.353340', 'step': 6944, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:55:11.425672', 'step': 6944, 'epoch': 3} +{'type': 'loss', 'content': 0.033413439989089966, 'timestamp': '2025-09-10 02:55:11.440594', 'step': 6945, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:11.494746', 'step': 6945, 'epoch': 3} +{'type': 'loss', 'content': 0.0015498296124860644, 'timestamp': '2025-09-10 02:55:11.500522', 'step': 6946, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:11.553958', 'step': 6946, 'epoch': 3} +{'type': 'loss', 'content': 0.0028644457925111055, 'timestamp': '2025-09-10 02:55:11.556744', 'step': 6947, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:11.610072', 'step': 6947, 'epoch': 3} +{'type': 'loss', 'content': 3.332937558297999e-05, 'timestamp': '2025-09-10 02:55:11.616275', 'step': 6948, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:11.668796', 'step': 6948, 'epoch': 3} +{'type': 'loss', 'content': 0.0024751699529588223, 'timestamp': '2025-09-10 02:55:11.671287', 'step': 6949, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:55:11.739622', 'step': 6949, 'epoch': 3} +{'type': 'loss', 'content': 0.0002412964531686157, 'timestamp': '2025-09-10 02:55:11.752229', 'step': 6950, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:11.805109', 'step': 6950, 'epoch': 3} +{'type': 'loss', 'content': 3.8502173993038014e-05, 'timestamp': '2025-09-10 02:55:11.807342', 'step': 6951, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:11.860416', 'step': 6951, 'epoch': 3} +{'type': 'loss', 'content': 0.00013299663260113448, 'timestamp': '2025-09-10 02:55:11.866285', 'step': 6952, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:11.918736', 'step': 6952, 'epoch': 3} +{'type': 'loss', 'content': 0.0008007865981198847, 'timestamp': '2025-09-10 02:55:11.921244', 'step': 6953, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:11.975215', 'step': 6953, 'epoch': 3} +{'type': 'loss', 'content': 3.039803050342016e-05, 'timestamp': '2025-09-10 02:55:11.982742', 'step': 6954, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:12.037426', 'step': 6954, 'epoch': 3} +{'type': 'loss', 'content': 0.0004602761473506689, 'timestamp': '2025-09-10 02:55:12.039789', 'step': 6955, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:12.098261', 'step': 6955, 'epoch': 3} +{'type': 'loss', 'content': 0.0004310185613576323, 'timestamp': '2025-09-10 02:55:12.109467', 'step': 6956, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:12.162588', 'step': 6956, 'epoch': 3} +{'type': 'loss', 'content': 0.005257864482700825, 'timestamp': '2025-09-10 02:55:12.164562', 'step': 6957, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:12.217449', 'step': 6957, 'epoch': 3} +{'type': 'loss', 'content': 0.000653850962407887, 'timestamp': '2025-09-10 02:55:12.219675', 'step': 6958, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:12.272949', 'step': 6958, 'epoch': 3} +{'type': 'loss', 'content': 9.644639067118987e-05, 'timestamp': '2025-09-10 02:55:12.275328', 'step': 6959, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 496], 'flops': 9920060287936.0}, 'timestamp': '2025-09-10 02:55:12.350005', 'step': 6959, 'epoch': 3} +{'type': 'loss', 'content': 0.0005518809193745255, 'timestamp': '2025-09-10 02:55:12.364712', 'step': 6960, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:12.417817', 'step': 6960, 'epoch': 3} +{'type': 'loss', 'content': 0.0004413666611071676, 'timestamp': '2025-09-10 02:55:12.424069', 'step': 6961, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:12.478290', 'step': 6961, 'epoch': 3} +{'type': 'loss', 'content': 0.0008641178137622774, 'timestamp': '2025-09-10 02:55:12.484593', 'step': 6962, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:12.538597', 'step': 6962, 'epoch': 3} +{'type': 'loss', 'content': 0.0025807858910411596, 'timestamp': '2025-09-10 02:55:12.540964', 'step': 6963, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:12.594820', 'step': 6963, 'epoch': 3} +{'type': 'loss', 'content': 1.628083919058554e-05, 'timestamp': '2025-09-10 02:55:12.601016', 'step': 6964, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:12.653912', 'step': 6964, 'epoch': 3} +{'type': 'loss', 'content': 0.0009217743645422161, 'timestamp': '2025-09-10 02:55:12.656604', 'step': 6965, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 416], 'flops': 8320050574976.0}, 'timestamp': '2025-09-10 02:55:12.724455', 'step': 6965, 'epoch': 3} +{'type': 'loss', 'content': 0.0016400327440351248, 'timestamp': '2025-09-10 02:55:12.737061', 'step': 6966, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:12.790206', 'step': 6966, 'epoch': 3} +{'type': 'loss', 'content': 0.0003185332752764225, 'timestamp': '2025-09-10 02:55:12.798166', 'step': 6967, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:12.851623', 'step': 6967, 'epoch': 3} +{'type': 'loss', 'content': 0.0008124056039378047, 'timestamp': '2025-09-10 02:55:12.857604', 'step': 6968, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:12.910816', 'step': 6968, 'epoch': 3} +{'type': 'loss', 'content': 0.013546434231102467, 'timestamp': '2025-09-10 02:55:12.913294', 'step': 6969, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:12.966852', 'step': 6969, 'epoch': 3} +{'type': 'loss', 'content': 0.0016551214503124356, 'timestamp': '2025-09-10 02:55:12.969785', 'step': 6970, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:55:13.030618', 'step': 6970, 'epoch': 3} +{'type': 'loss', 'content': 7.480200292775407e-05, 'timestamp': '2025-09-10 02:55:13.041533', 'step': 6971, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:13.094803', 'step': 6971, 'epoch': 3} +{'type': 'loss', 'content': 0.0012193608563393354, 'timestamp': '2025-09-10 02:55:13.100881', 'step': 6972, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:13.154543', 'step': 6972, 'epoch': 3} +{'type': 'loss', 'content': 1.0489126907486934e-05, 'timestamp': '2025-09-10 02:55:13.156783', 'step': 6973, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:13.209917', 'step': 6973, 'epoch': 3} +{'type': 'loss', 'content': 0.002034224336966872, 'timestamp': '2025-09-10 02:55:13.212382', 'step': 6974, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:13.266121', 'step': 6974, 'epoch': 3} +{'type': 'loss', 'content': 0.0006291036261245608, 'timestamp': '2025-09-10 02:55:13.271787', 'step': 6975, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:13.326662', 'step': 6975, 'epoch': 3} +{'type': 'loss', 'content': 0.0034870824310928583, 'timestamp': '2025-09-10 02:55:13.333131', 'step': 6976, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:55:13.398463', 'step': 6976, 'epoch': 3} +{'type': 'loss', 'content': 0.002291430253535509, 'timestamp': '2025-09-10 02:55:13.411674', 'step': 6977, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:13.466524', 'step': 6977, 'epoch': 3} +{'type': 'loss', 'content': 5.7963301514973864e-05, 'timestamp': '2025-09-10 02:55:13.468840', 'step': 6978, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:13.522545', 'step': 6978, 'epoch': 3} +{'type': 'loss', 'content': 0.0013603951083496213, 'timestamp': '2025-09-10 02:55:13.529891', 'step': 6979, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:13.588390', 'step': 6979, 'epoch': 3} +{'type': 'loss', 'content': 0.00023517789668403566, 'timestamp': '2025-09-10 02:55:13.599601', 'step': 6980, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:13.653341', 'step': 6980, 'epoch': 3} +{'type': 'loss', 'content': 0.00038756770663894713, 'timestamp': '2025-09-10 02:55:13.663094', 'step': 6981, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:13.720480', 'step': 6981, 'epoch': 3} +{'type': 'loss', 'content': 1.3510049939213786e-05, 'timestamp': '2025-09-10 02:55:13.723039', 'step': 6982, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:13.776891', 'step': 6982, 'epoch': 3} +{'type': 'loss', 'content': 1.902151052490808e-05, 'timestamp': '2025-09-10 02:55:13.786457', 'step': 6983, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:13.840148', 'step': 6983, 'epoch': 3} +{'type': 'loss', 'content': 0.02695157565176487, 'timestamp': '2025-09-10 02:55:13.846632', 'step': 6984, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:13.903806', 'step': 6984, 'epoch': 3} +{'type': 'loss', 'content': 2.4220677005359903e-05, 'timestamp': '2025-09-10 02:55:13.915046', 'step': 6985, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:55:13.970313', 'step': 6985, 'epoch': 3} +{'type': 'loss', 'content': 7.140782690839842e-05, 'timestamp': '2025-09-10 02:55:13.980137', 'step': 6986, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:14.033553', 'step': 6986, 'epoch': 3} +{'type': 'loss', 'content': 0.000857262930367142, 'timestamp': '2025-09-10 02:55:14.035730', 'step': 6987, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:14.088780', 'step': 6987, 'epoch': 3} +{'type': 'loss', 'content': 1.702000190562103e-05, 'timestamp': '2025-09-10 02:55:14.095016', 'step': 6988, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:14.147885', 'step': 6988, 'epoch': 3} +{'type': 'loss', 'content': 1.4767555512662511e-05, 'timestamp': '2025-09-10 02:55:14.149966', 'step': 6989, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:14.204038', 'step': 6989, 'epoch': 3} +{'type': 'loss', 'content': 0.00014243644545786083, 'timestamp': '2025-09-10 02:55:14.206222', 'step': 6990, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:14.260411', 'step': 6990, 'epoch': 3} +{'type': 'loss', 'content': 0.00014196685515344143, 'timestamp': '2025-09-10 02:55:14.262779', 'step': 6991, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:14.316844', 'step': 6991, 'epoch': 3} +{'type': 'loss', 'content': 0.00042847523582167923, 'timestamp': '2025-09-10 02:55:14.323064', 'step': 6992, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:14.376465', 'step': 6992, 'epoch': 3} +{'type': 'loss', 'content': 0.0003668736608233303, 'timestamp': '2025-09-10 02:55:14.378674', 'step': 6993, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:14.431918', 'step': 6993, 'epoch': 3} +{'type': 'loss', 'content': 0.00011432624887675047, 'timestamp': '2025-09-10 02:55:14.438385', 'step': 6994, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:14.491502', 'step': 6994, 'epoch': 3} +{'type': 'loss', 'content': 7.136528438422829e-05, 'timestamp': '2025-09-10 02:55:14.493868', 'step': 6995, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:55:14.555636', 'step': 6995, 'epoch': 3} +{'type': 'loss', 'content': 0.00809055007994175, 'timestamp': '2025-09-10 02:55:14.567515', 'step': 6996, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:55:14.627633', 'step': 6996, 'epoch': 3} +{'type': 'loss', 'content': 8.337383769685403e-05, 'timestamp': '2025-09-10 02:55:14.639405', 'step': 6997, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:14.692953', 'step': 6997, 'epoch': 3} +{'type': 'loss', 'content': 8.763316145632416e-06, 'timestamp': '2025-09-10 02:55:14.695022', 'step': 6998, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 368], 'flops': 7360044747200.0}, 'timestamp': '2025-09-10 02:55:14.755818', 'step': 6998, 'epoch': 3} +{'type': 'loss', 'content': 0.00019043213978875428, 'timestamp': '2025-09-10 02:55:14.766731', 'step': 6999, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:14.822155', 'step': 6999, 'epoch': 3} +{'type': 'loss', 'content': 7.073474989738315e-05, 'timestamp': '2025-09-10 02:55:14.828520', 'step': 7000, 'epoch': 3} +{'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-10 02:55:15.191436', 'step': 7000, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:15.248007', 'step': 7000, 'epoch': 3} +{'type': 'loss', 'content': 5.3104588005226105e-05, 'timestamp': '2025-09-10 02:55:15.250321', 'step': 7001, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:15.304074', 'step': 7001, 'epoch': 3} +{'type': 'loss', 'content': 5.853674338140991e-06, 'timestamp': '2025-09-10 02:55:15.306225', 'step': 7002, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:15.359508', 'step': 7002, 'epoch': 3} +{'type': 'loss', 'content': 0.04805157333612442, 'timestamp': '2025-09-10 02:55:15.361687', 'step': 7003, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:15.414620', 'step': 7003, 'epoch': 3} +{'type': 'loss', 'content': 0.00040699567762203515, 'timestamp': '2025-09-10 02:55:15.421047', 'step': 7004, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:55:15.474609', 'step': 7004, 'epoch': 3} +{'type': 'loss', 'content': 3.4563810913823545e-05, 'timestamp': '2025-09-10 02:55:15.485141', 'step': 7005, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:15.538890', 'step': 7005, 'epoch': 3} +{'type': 'loss', 'content': 7.641797856194898e-05, 'timestamp': '2025-09-10 02:55:15.540985', 'step': 7006, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:15.594812', 'step': 7006, 'epoch': 3} +{'type': 'loss', 'content': 8.418989636993501e-06, 'timestamp': '2025-09-10 02:55:15.596952', 'step': 7007, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:15.653036', 'step': 7007, 'epoch': 3} +{'type': 'loss', 'content': 0.00425726268440485, 'timestamp': '2025-09-10 02:55:15.659344', 'step': 7008, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:15.712439', 'step': 7008, 'epoch': 3} +{'type': 'loss', 'content': 0.0047266217879951, 'timestamp': '2025-09-10 02:55:15.718601', 'step': 7009, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:15.772224', 'step': 7009, 'epoch': 3} +{'type': 'loss', 'content': 0.004258487839251757, 'timestamp': '2025-09-10 02:55:15.774695', 'step': 7010, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-10 02:55:15.827970', 'step': 7010, 'epoch': 3} +{'type': 'loss', 'content': 0.001172769581899047, 'timestamp': '2025-09-10 02:55:15.830465', 'step': 7011, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:15.884660', 'step': 7011, 'epoch': 3} +{'type': 'loss', 'content': 0.0013297060504555702, 'timestamp': '2025-09-10 02:55:15.891424', 'step': 7012, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:15.944408', 'step': 7012, 'epoch': 3} +{'type': 'loss', 'content': 7.664418080821633e-05, 'timestamp': '2025-09-10 02:55:15.946816', 'step': 7013, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:16.000891', 'step': 7013, 'epoch': 3} +{'type': 'loss', 'content': 8.392694871872663e-05, 'timestamp': '2025-09-10 02:55:16.009053', 'step': 7014, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:16.062988', 'step': 7014, 'epoch': 3} +{'type': 'loss', 'content': 1.8508600987843238e-05, 'timestamp': '2025-09-10 02:55:16.065254', 'step': 7015, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:16.118610', 'step': 7015, 'epoch': 3} +{'type': 'loss', 'content': 1.0045929229818285e-05, 'timestamp': '2025-09-10 02:55:16.124692', 'step': 7016, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:16.177581', 'step': 7016, 'epoch': 3} +{'type': 'loss', 'content': 4.157526564085856e-05, 'timestamp': '2025-09-10 02:55:16.185821', 'step': 7017, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:16.244377', 'step': 7017, 'epoch': 3} +{'type': 'loss', 'content': 0.0013229567557573318, 'timestamp': '2025-09-10 02:55:16.254816', 'step': 7018, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:16.309304', 'step': 7018, 'epoch': 3} +{'type': 'loss', 'content': 0.0016287763137370348, 'timestamp': '2025-09-10 02:55:16.311971', 'step': 7019, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 400], 'flops': 8000048632384.0}, 'timestamp': '2025-09-10 02:55:16.378558', 'step': 7019, 'epoch': 3} +{'type': 'loss', 'content': 0.0005435793427750468, 'timestamp': '2025-09-10 02:55:16.391546', 'step': 7020, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:16.444436', 'step': 7020, 'epoch': 3} +{'type': 'loss', 'content': 0.00043971644481644034, 'timestamp': '2025-09-10 02:55:16.447213', 'step': 7021, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 480], 'flops': 9600058345344.0}, 'timestamp': '2025-09-10 02:55:16.521865', 'step': 7021, 'epoch': 3} +{'type': 'loss', 'content': 0.038626980036497116, 'timestamp': '2025-09-10 02:55:16.535558', 'step': 7022, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:16.589696', 'step': 7022, 'epoch': 3} +{'type': 'loss', 'content': 0.00037362458533607423, 'timestamp': '2025-09-10 02:55:16.591925', 'step': 7023, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:16.645143', 'step': 7023, 'epoch': 3} +{'type': 'loss', 'content': 0.00013430576655082405, 'timestamp': '2025-09-10 02:55:16.651283', 'step': 7024, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:16.704171', 'step': 7024, 'epoch': 3} +{'type': 'loss', 'content': 0.058919306844472885, 'timestamp': '2025-09-10 02:55:16.706448', 'step': 7025, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:16.759685', 'step': 7025, 'epoch': 3} +{'type': 'loss', 'content': 0.0005710253608413041, 'timestamp': '2025-09-10 02:55:16.761909', 'step': 7026, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:16.814670', 'step': 7026, 'epoch': 3} +{'type': 'loss', 'content': 0.00021847318566869944, 'timestamp': '2025-09-10 02:55:16.816853', 'step': 7027, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:16.869723', 'step': 7027, 'epoch': 3} +{'type': 'loss', 'content': 0.0014488119632005692, 'timestamp': '2025-09-10 02:55:16.878602', 'step': 7028, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:16.931553', 'step': 7028, 'epoch': 3} +{'type': 'loss', 'content': 0.00017375395691487938, 'timestamp': '2025-09-10 02:55:16.938148', 'step': 7029, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:16.991885', 'step': 7029, 'epoch': 3} +{'type': 'loss', 'content': 7.632833148818463e-05, 'timestamp': '2025-09-10 02:55:16.994073', 'step': 7030, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-10 02:55:17.048979', 'step': 7030, 'epoch': 3} +{'type': 'loss', 'content': 3.6653014831244946e-05, 'timestamp': '2025-09-10 02:55:17.051328', 'step': 7031, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:17.108652', 'step': 7031, 'epoch': 3} +{'type': 'loss', 'content': 0.0006913283723406494, 'timestamp': '2025-09-10 02:55:17.115321', 'step': 7032, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 320], 'flops': 6400038919424.0}, 'timestamp': '2025-09-10 02:55:17.170294', 'step': 7032, 'epoch': 3} +{'type': 'loss', 'content': 0.00040089216781780124, 'timestamp': '2025-09-10 02:55:17.179507', 'step': 7033, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-10 02:55:17.233645', 'step': 7033, 'epoch': 3} +{'type': 'loss', 'content': 0.0007994318730197847, 'timestamp': '2025-09-10 02:55:17.235844', 'step': 7034, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:17.289023', 'step': 7034, 'epoch': 3} +{'type': 'loss', 'content': 0.009965775534510612, 'timestamp': '2025-09-10 02:55:17.291661', 'step': 7035, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:55:17.351879', 'step': 7035, 'epoch': 3} +{'type': 'loss', 'content': 0.0024551197420805693, 'timestamp': '2025-09-10 02:55:17.363423', 'step': 7036, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:55:17.422979', 'step': 7036, 'epoch': 3} +{'type': 'loss', 'content': 0.00020564479927998036, 'timestamp': '2025-09-10 02:55:17.434563', 'step': 7037, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:17.487405', 'step': 7037, 'epoch': 3} +{'type': 'loss', 'content': 0.00021126912906765938, 'timestamp': '2025-09-10 02:55:17.489579', 'step': 7038, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:17.544097', 'step': 7038, 'epoch': 3} +{'type': 'loss', 'content': 0.0007856762385927141, 'timestamp': '2025-09-10 02:55:17.552167', 'step': 7039, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:17.605563', 'step': 7039, 'epoch': 3} +{'type': 'loss', 'content': 0.0001274820970138535, 'timestamp': '2025-09-10 02:55:17.612830', 'step': 7040, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 384], 'flops': 7680046689792.0}, 'timestamp': '2025-09-10 02:55:17.672886', 'step': 7040, 'epoch': 3} +{'type': 'loss', 'content': 0.003535389667376876, 'timestamp': '2025-09-10 02:55:17.684883', 'step': 7041, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:55:17.758057', 'step': 7041, 'epoch': 3} +{'type': 'loss', 'content': 0.012710364535450935, 'timestamp': '2025-09-10 02:55:17.771541', 'step': 7042, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:17.825197', 'step': 7042, 'epoch': 3} +{'type': 'loss', 'content': 0.0010614634957164526, 'timestamp': '2025-09-10 02:55:17.827335', 'step': 7043, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:17.880128', 'step': 7043, 'epoch': 3} +{'type': 'loss', 'content': 0.00012138697638874874, 'timestamp': '2025-09-10 02:55:17.885939', 'step': 7044, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:17.942902', 'step': 7044, 'epoch': 3} +{'type': 'loss', 'content': 9.633704394218512e-06, 'timestamp': '2025-09-10 02:55:17.954099', 'step': 7045, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-10 02:55:18.008105', 'step': 7045, 'epoch': 3} +{'type': 'loss', 'content': 0.0007055842434056103, 'timestamp': '2025-09-10 02:55:18.014439', 'step': 7046, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:18.069170', 'step': 7046, 'epoch': 3} +{'type': 'loss', 'content': 2.52599420491606e-05, 'timestamp': '2025-09-10 02:55:18.071302', 'step': 7047, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 304], 'flops': 6080036976832.0}, 'timestamp': '2025-09-10 02:55:18.125948', 'step': 7047, 'epoch': 3} +{'type': 'loss', 'content': 0.00012941339809913188, 'timestamp': '2025-09-10 02:55:18.136344', 'step': 7048, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:18.189350', 'step': 7048, 'epoch': 3} +{'type': 'loss', 'content': 0.0006682535749860108, 'timestamp': '2025-09-10 02:55:18.191467', 'step': 7049, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:18.244383', 'step': 7049, 'epoch': 3} +{'type': 'loss', 'content': 0.00012835065717808902, 'timestamp': '2025-09-10 02:55:18.246650', 'step': 7050, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-10 02:55:18.299891', 'step': 7050, 'epoch': 3} +{'type': 'loss', 'content': 4.6844728785799816e-05, 'timestamp': '2025-09-10 02:55:18.301962', 'step': 7051, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 464], 'flops': 9280056402752.0}, 'timestamp': '2025-09-10 02:55:18.375534', 'step': 7051, 'epoch': 3} +{'type': 'loss', 'content': 7.747875497443601e-05, 'timestamp': '2025-09-10 02:55:18.389709', 'step': 7052, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-10 02:55:18.443197', 'step': 7052, 'epoch': 3} +{'type': 'loss', 'content': 0.000303348817396909, 'timestamp': '2025-09-10 02:55:18.445500', 'step': 7053, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:18.499880', 'step': 7053, 'epoch': 3} +{'type': 'loss', 'content': 0.0007727089687250555, 'timestamp': '2025-09-10 02:55:18.502413', 'step': 7054, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 336], 'flops': 6720040862016.0}, 'timestamp': '2025-09-10 02:55:18.560617', 'step': 7054, 'epoch': 3} +{'type': 'loss', 'content': 0.0004567324067465961, 'timestamp': '2025-09-10 02:55:18.571032', 'step': 7055, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 448], 'flops': 8960054460160.0}, 'timestamp': '2025-09-10 02:55:18.641154', 'step': 7055, 'epoch': 3} +{'type': 'loss', 'content': 0.030527567490935326, 'timestamp': '2025-09-10 02:55:18.654786', 'step': 7056, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:55:35.608707', 'step': 7056, 'epoch': 3} +{'type': 'pplx', 'content': 18865554.804102868, 'timestamp': '2025-09-10 02:55:35.611908', 'step': 7056, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:35.665244', 'step': 7056, 'epoch': 3} +{'type': 'loss', 'content': 0.00037813952076248825, 'timestamp': '2025-09-10 02:55:35.668899', 'step': 7057, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:35.725023', 'step': 7057, 'epoch': 3} +{'type': 'loss', 'content': 0.0026767307426780462, 'timestamp': '2025-09-10 02:55:35.727564', 'step': 7058, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 256], 'flops': 5120031149056.0}, 'timestamp': '2025-09-10 02:55:35.780593', 'step': 7058, 'epoch': 3} +{'type': 'loss', 'content': 2.191412386309821e-05, 'timestamp': '2025-09-10 02:55:35.783382', 'step': 7059, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:35.837116', 'step': 7059, 'epoch': 3} +{'type': 'loss', 'content': 0.000373539311112836, 'timestamp': '2025-09-10 02:55:35.843242', 'step': 7060, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:35.896428', 'step': 7060, 'epoch': 3} +{'type': 'loss', 'content': 0.003901652991771698, 'timestamp': '2025-09-10 02:55:35.898665', 'step': 7061, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:35.952786', 'step': 7061, 'epoch': 3} +{'type': 'loss', 'content': 6.882033630972728e-05, 'timestamp': '2025-09-10 02:55:35.960079', 'step': 7062, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-10 02:55:36.013523', 'step': 7062, 'epoch': 3} +{'type': 'loss', 'content': 0.002307756105437875, 'timestamp': '2025-09-10 02:55:36.015673', 'step': 7063, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-10 02:55:36.075695', 'step': 7063, 'epoch': 3} +{'type': 'loss', 'content': 0.0011797071201726794, 'timestamp': '2025-09-10 02:55:36.087167', 'step': 7064, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:36.140394', 'step': 7064, 'epoch': 3} +{'type': 'loss', 'content': 0.0024234021548181772, 'timestamp': '2025-09-10 02:55:36.142781', 'step': 7065, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 656], 'flops': 13120079713856.0}, 'timestamp': '2025-09-10 02:55:36.238890', 'step': 7065, 'epoch': 3} +{'type': 'loss', 'content': 0.0002509126497898251, 'timestamp': '2025-09-10 02:55:36.257356', 'step': 7066, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 288], 'flops': 5760035034240.0}, 'timestamp': '2025-09-10 02:55:36.311123', 'step': 7066, 'epoch': 3} +{'type': 'loss', 'content': 0.013593490235507488, 'timestamp': '2025-09-10 02:55:36.319015', 'step': 7067, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 240], 'flops': 4800029206464.0}, 'timestamp': '2025-09-10 02:55:36.372160', 'step': 7067, 'epoch': 3} +{'type': 'loss', 'content': 0.00026906438870355487, 'timestamp': '2025-09-10 02:55:36.378055', 'step': 7068, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-10 02:55:36.431156', 'step': 7068, 'epoch': 3} +{'type': 'loss', 'content': 0.004143257159739733, 'timestamp': '2025-09-10 02:55:36.433349', 'step': 7069, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 528], 'flops': 10560064173120.0}, 'timestamp': '2025-09-10 02:55:36.513985', 'step': 7069, 'epoch': 3} +{'type': 'loss', 'content': 6.296109495451674e-05, 'timestamp': '2025-09-10 02:55:36.528928', 'step': 7070, 'epoch': 3} +{'type': 'flops', 'content': {'type': 'train', 'batch_dim': [3, 224], 'flops': 3360020475552.0}, 'timestamp': '2025-09-10 02:55:36.591269', 'step': 7070, 'epoch': 3} +{'type': 'loss', 'content': 9.570770635036752e-05, 'timestamp': '2025-09-10 02:55:36.593516', 'step': 7071, 'epoch': 3} +{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 736], 'batch_size': 8, 'flops': 14691612894976}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 480], 'batch_size': 8, 'flops': 9581486694144}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 624], 'batch_size': 8, 'flops': 12455932682112}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 432], 'batch_size': 8, 'flops': 8623338031488}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 544], 'batch_size': 8, 'flops': 10859018244352}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 656], 'batch_size': 8, 'flops': 13094698457216}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 496], 'batch_size': 8, 'flops': 9900869581696}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 416], 'batch_size': 8, 'flops': 8303955143936}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 400], 'batch_size': 8, 'flops': 7984572256384}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 352], 'batch_size': 8, 'flops': 7026423593728}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 288], 'batch_size': 8, 'flops': 5748892043520}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 464], 'batch_size': 8, 'flops': 9262103806592}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 448], 'batch_size': 8, 'flops': 8942720919040}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 560], 'batch_size': 8, 'flops': 11178401131904}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 320], 'batch_size': 8, 'flops': 6387657818624}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 304], 'batch_size': 8, 'flops': 6068274931072}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 576], 'batch_size': 8, 'flops': 11497784019456}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 368], 'batch_size': 8, 'flops': 7345806481280}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 1168], 'batch_size': 8, 'flops': 23314950858880}, {'type': 'perplexity', 'in_batch_dim': [8, 256], 'batch_size': 8, 'flops': 5110126268416}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 240], 'batch_size': 8, 'flops': 4790743380864}, {'type': 'perplexity', 'in_batch_dim': [8, 640], 'batch_size': 8, 'flops': 12775315569664}, {'type': 'perplexity', 'in_batch_dim': [8, 272], 'batch_size': 8, 'flops': 5429509155968}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 224], 'batch_size': 8, 'flops': 4471360493312}, {'type': 'perplexity', 'in_batch_dim': [8, 384], 'batch_size': 8, 'flops': 7665189368832}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [8, 208], 'batch_size': 8, 'flops': 4151977605760}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 336], 'batch_size': 8, 'flops': 6707040706176}, {'type': 'perplexity', 'in_batch_dim': [8, 176], 'batch_size': 8, 'flops': 3513211830656}, {'type': 'perplexity', 'in_batch_dim': [6, 208], 'batch_size': 8, 'flops': 4151977605760}], 'timestamp': '2025-09-10 02:55:53.504071', 'step': 7071, 'epoch': 3} +{'type': 'pplx', 'content': 18515583.117156155, 'timestamp': '2025-09-10 02:55:53.506885', 'step': 7071, 'epoch': 3} +{'type': 'best_pplx', 'content': 18397125.397900093, 'timestamp': '2025-09-10 02:55:53.508374', 'step': 7071, 'epoch': 3} +{'type': 'best_step', 'content': 5292, 'timestamp': '2025-09-10 02:55:53.509782', 'step': 7071, 'epoch': 3} +{'type': 'total_pplx_flops', 'content': 106689854968838400, 'timestamp': '2025-09-10 02:55:53.511030', 'step': 7071, 'epoch': 3} +{'type': 'total_train_flops', 'content': 3.61888601646767e+16, 'timestamp': '2025-09-10 02:55:53.512770', 'step': 7071, 'epoch': 3}