{ "training_args": { "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1", "overwrite_output_dir": false, "do_train": false, "do_eval": true, "do_predict": false, "eval_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 8, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 4, "eval_accumulation_steps": null, "eval_delay": 0, "torch_empty_cache_steps": null, "learning_rate": 2e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3, "max_steps": -1, "lr_scheduler_type": "linear", "lr_scheduler_kwargs": {}, "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1/runs/Sep10_02-15-40_gx08", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 20, "logging_nan_inf_filter": true, "save_strategy": "epoch", "save_steps": 500, "save_total_limit": null, "save_safetensors": true, "save_on_each_node": false, "save_only_model": false, "restore_callback_states_from_checkpoint": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 147, "dataloader_num_workers": 0, "dataloader_prefetch_factor": null, "past_index": -1, "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_boolq_ff_v1", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": { "min_num_params": 0, "xla": false, "xla_fsdp_v2": false, "xla_fsdp_grad_ckpt": false }, "fsdp_transformer_layer_cls_to_wrap": null, "accelerator_config": { "split_batches": false, "dispatch_batches": null, "even_batches": true, "use_seedable_sampler": true, "non_blocking": false, "gradient_accumulation_kwargs": null }, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "dataloader_persistent_workers": false, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": "", "hub_private_repo": null, "hub_always_push": false, "gradient_checkpointing": false, "gradient_checkpointing_kwargs": null, "include_inputs_for_metrics": false, "include_for_metrics": [], "eval_do_concat_batches": true, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": "", "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "include_tokens_per_second": false, "include_num_input_tokens_seen": false, "neftune_noise_alpha": null, "optim_target_modules": null, "batch_eval_metrics": false, "eval_on_start": false, "use_liger_kernel": false, "eval_use_gather_object": false, "average_tokens_across_devices": false }, "lora_config": null, "flops": { "eval": 105693667713235200, "train": 53674555878669600, "total": 159368223591904800 }, "total_energy": 126.84330000000001, "logs": [ { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:16:01.984520", "step": 0, "epoch": 0 }, { "type": "pplx", "content": 54140675.446864516, "timestamp": "2025-09-10 02:16:01.988837", "step": 0, "epoch": 0 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:02.062821", "step": 0, "epoch": 1 }, { "type": "loss", "content": 0.6008338332176208, "timestamp": "2025-09-10 02:16:02.064796", "step": 1, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:02.110853", "step": 1, "epoch": 1 }, { "type": "loss", "content": 0.5395371317863464, "timestamp": "2025-09-10 02:16:02.115148", "step": 2, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:02.146065", "step": 2, "epoch": 1 }, { "type": "loss", "content": 0.547315239906311, "timestamp": "2025-09-10 02:16:02.152973", "step": 3, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:02.197495", "step": 3, "epoch": 1 }, { "type": "loss", "content": 0.6588919758796692, "timestamp": "2025-09-10 02:16:02.248579", "step": 4, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:02.280581", "step": 4, "epoch": 1 }, { "type": "loss", "content": 0.2344198077917099, "timestamp": "2025-09-10 02:16:02.284584", "step": 5, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:02.335992", "step": 5, "epoch": 1 }, { "type": "loss", "content": 0.18304279446601868, "timestamp": "2025-09-10 02:16:02.338088", "step": 6, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:02.368461", "step": 6, "epoch": 1 }, { "type": "loss", "content": 0.1824495494365692, "timestamp": "2025-09-10 02:16:02.375219", "step": 7, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:02.418461", "step": 7, "epoch": 1 }, { "type": "loss", "content": 0.21861636638641357, "timestamp": "2025-09-10 02:16:02.443617", "step": 8, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:02.500477", "step": 8, "epoch": 1 }, { "type": "loss", "content": 0.0901818498969078, "timestamp": "2025-09-10 02:16:02.503792", "step": 9, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:02.534851", "step": 9, "epoch": 1 }, { "type": "loss", "content": 0.06925918161869049, "timestamp": "2025-09-10 02:16:02.542382", "step": 10, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:02.581087", "step": 10, "epoch": 1 }, { "type": "loss", "content": 0.06179536134004593, "timestamp": "2025-09-10 02:16:02.587234", "step": 11, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:02.625224", "step": 11, "epoch": 1 }, { "type": "loss", "content": 0.060144226998090744, "timestamp": "2025-09-10 02:16:02.652677", "step": 12, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:02.700676", "step": 12, "epoch": 1 }, { "type": "loss", "content": 0.04763566702604294, "timestamp": "2025-09-10 02:16:02.704489", "step": 13, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:02.757375", "step": 13, "epoch": 1 }, { "type": "loss", "content": 0.04455741122364998, "timestamp": "2025-09-10 02:16:02.765877", "step": 14, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:02.807895", "step": 14, "epoch": 1 }, { "type": "loss", "content": 0.03932064399123192, "timestamp": "2025-09-10 02:16:02.812005", "step": 15, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:02.847941", "step": 15, "epoch": 1 }, { "type": "loss", "content": 0.02747354283928871, "timestamp": "2025-09-10 02:16:02.874022", "step": 16, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:02.926383", "step": 16, "epoch": 1 }, { "type": "loss", "content": 0.02676134742796421, "timestamp": "2025-09-10 02:16:02.930699", "step": 17, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:02.967263", "step": 17, "epoch": 1 }, { "type": "loss", "content": 0.036269430071115494, "timestamp": "2025-09-10 02:16:02.971943", "step": 18, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:03.013820", "step": 18, "epoch": 1 }, { "type": "loss", "content": 0.025049904361367226, "timestamp": "2025-09-10 02:16:03.023342", "step": 19, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:03.064093", "step": 19, "epoch": 1 }, { "type": "loss", "content": 0.038132019340991974, "timestamp": "2025-09-10 02:16:03.092030", "step": 20, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:03.131090", "step": 20, "epoch": 1 }, { "type": "loss", "content": 0.02351507358253002, "timestamp": "2025-09-10 02:16:03.138086", "step": 21, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:03.174188", "step": 21, "epoch": 1 }, { "type": "loss", "content": 0.016791896894574165, "timestamp": "2025-09-10 02:16:03.177900", "step": 22, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:03.210824", "step": 22, "epoch": 1 }, { "type": "loss", "content": 0.028946993872523308, "timestamp": "2025-09-10 02:16:03.217309", "step": 23, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:03.248257", "step": 23, "epoch": 1 }, { "type": "loss", "content": 0.030578048899769783, "timestamp": "2025-09-10 02:16:03.276442", "step": 24, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:03.307551", "step": 24, "epoch": 1 }, { "type": "loss", "content": 0.0187423974275589, "timestamp": "2025-09-10 02:16:03.311793", "step": 25, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:03.343360", "step": 25, "epoch": 1 }, { "type": "loss", "content": 0.01975913718342781, "timestamp": "2025-09-10 02:16:03.350089", "step": 26, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:03.380780", "step": 26, "epoch": 1 }, { "type": "loss", "content": 0.0228941161185503, "timestamp": "2025-09-10 02:16:03.387364", "step": 27, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:03.436367", "step": 27, "epoch": 1 }, { "type": "loss", "content": 0.02786724641919136, "timestamp": "2025-09-10 02:16:03.460600", "step": 28, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:03.493390", "step": 28, "epoch": 1 }, { "type": "loss", "content": 0.045785628259181976, "timestamp": "2025-09-10 02:16:03.498028", "step": 29, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:03.533826", "step": 29, "epoch": 1 }, { "type": "loss", "content": 0.004484932404011488, "timestamp": "2025-09-10 02:16:03.546978", "step": 30, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:03.578360", "step": 30, "epoch": 1 }, { "type": "loss", "content": 0.05362967774271965, "timestamp": "2025-09-10 02:16:03.584874", "step": 31, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:03.616459", "step": 31, "epoch": 1 }, { "type": "loss", "content": 0.06305649876594543, "timestamp": "2025-09-10 02:16:03.644134", "step": 32, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:03.677074", "step": 32, "epoch": 1 }, { "type": "loss", "content": 0.015654366463422775, "timestamp": "2025-09-10 02:16:03.679219", "step": 33, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:03.710464", "step": 33, "epoch": 1 }, { "type": "loss", "content": 0.02429381012916565, "timestamp": "2025-09-10 02:16:03.717986", "step": 34, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:03.749170", "step": 34, "epoch": 1 }, { "type": "loss", "content": 0.04219824820756912, "timestamp": "2025-09-10 02:16:03.758869", "step": 35, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:03.789600", "step": 35, "epoch": 1 }, { "type": "loss", "content": 0.05330771207809448, "timestamp": "2025-09-10 02:16:03.813089", "step": 36, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:03.843661", "step": 36, "epoch": 1 }, { "type": "loss", "content": 0.013570256531238556, "timestamp": "2025-09-10 02:16:03.848084", "step": 37, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:03.893622", "step": 37, "epoch": 1 }, { "type": "loss", "content": 0.01802876405417919, "timestamp": "2025-09-10 02:16:03.895915", "step": 38, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:03.928023", "step": 38, "epoch": 1 }, { "type": "loss", "content": 0.02937530353665352, "timestamp": "2025-09-10 02:16:03.932421", "step": 39, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:03.962845", "step": 39, "epoch": 1 }, { "type": "loss", "content": 0.03161190077662468, "timestamp": "2025-09-10 02:16:03.990473", "step": 40, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:04.022146", "step": 40, "epoch": 1 }, { "type": "loss", "content": 0.03022809512913227, "timestamp": "2025-09-10 02:16:04.026833", "step": 41, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:04.058668", "step": 41, "epoch": 1 }, { "type": "loss", "content": 0.027601536363363266, "timestamp": "2025-09-10 02:16:04.062673", "step": 42, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:04.097743", "step": 42, "epoch": 1 }, { "type": "loss", "content": 0.021324804052710533, "timestamp": "2025-09-10 02:16:04.111039", "step": 43, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:04.143441", "step": 43, "epoch": 1 }, { "type": "loss", "content": 0.020267976447939873, "timestamp": "2025-09-10 02:16:04.171221", "step": 44, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:04.202635", "step": 44, "epoch": 1 }, { "type": "loss", "content": 0.02561989612877369, "timestamp": "2025-09-10 02:16:04.204754", "step": 45, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:04.235907", "step": 45, "epoch": 1 }, { "type": "loss", "content": 0.025528931990265846, "timestamp": "2025-09-10 02:16:04.242862", "step": 46, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:04.273959", "step": 46, "epoch": 1 }, { "type": "loss", "content": 0.021888835355639458, "timestamp": "2025-09-10 02:16:04.283691", "step": 47, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:04.315341", "step": 47, "epoch": 1 }, { "type": "loss", "content": 0.027732163667678833, "timestamp": "2025-09-10 02:16:04.342901", "step": 48, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:04.379523", "step": 48, "epoch": 1 }, { "type": "loss", "content": 0.03233006224036217, "timestamp": "2025-09-10 02:16:04.383378", "step": 49, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:04.414652", "step": 49, "epoch": 1 }, { "type": "loss", "content": 0.021881645545363426, "timestamp": "2025-09-10 02:16:04.425174", "step": 50, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:04.455872", "step": 50, "epoch": 1 }, { "type": "loss", "content": 0.03416941687464714, "timestamp": "2025-09-10 02:16:04.458081", "step": 51, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 928 ], "flops": 27527278844800 }, "timestamp": "2025-09-10 02:16:04.632897", "step": 51, "epoch": 1 }, { "type": "loss", "content": 0.027697524055838585, "timestamp": "2025-09-10 02:16:04.656929", "step": 52, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:04.694123", "step": 52, "epoch": 1 }, { "type": "loss", "content": 0.032309334725141525, "timestamp": "2025-09-10 02:16:04.696565", "step": 53, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:04.732462", "step": 53, "epoch": 1 }, { "type": "loss", "content": 0.014240605756640434, "timestamp": "2025-09-10 02:16:04.739274", "step": 54, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:04.773962", "step": 54, "epoch": 1 }, { "type": "loss", "content": 0.03165564686059952, "timestamp": "2025-09-10 02:16:04.779367", "step": 55, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:04.829001", "step": 55, "epoch": 1 }, { "type": "loss", "content": 0.028052538633346558, "timestamp": "2025-09-10 02:16:04.861010", "step": 56, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:04.896301", "step": 56, "epoch": 1 }, { "type": "loss", "content": 0.02410052716732025, "timestamp": "2025-09-10 02:16:04.898695", "step": 57, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:04.932687", "step": 57, "epoch": 1 }, { "type": "loss", "content": 0.028389716520905495, "timestamp": "2025-09-10 02:16:04.938045", "step": 58, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:04.970808", "step": 58, "epoch": 1 }, { "type": "loss", "content": 0.02223231829702854, "timestamp": "2025-09-10 02:16:04.979603", "step": 59, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:05.012882", "step": 59, "epoch": 1 }, { "type": "loss", "content": 0.025019675493240356, "timestamp": "2025-09-10 02:16:05.040525", "step": 60, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:05.075959", "step": 60, "epoch": 1 }, { "type": "loss", "content": 0.02373148687183857, "timestamp": "2025-09-10 02:16:05.083328", "step": 61, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:05.118418", "step": 61, "epoch": 1 }, { "type": "loss", "content": 0.0228324793279171, "timestamp": "2025-09-10 02:16:05.123087", "step": 62, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:05.159154", "step": 62, "epoch": 1 }, { "type": "loss", "content": 0.02589366026222706, "timestamp": "2025-09-10 02:16:05.165199", "step": 63, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:05.196884", "step": 63, "epoch": 1 }, { "type": "loss", "content": 0.02020171843469143, "timestamp": "2025-09-10 02:16:05.224420", "step": 64, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:16:05.262460", "step": 64, "epoch": 1 }, { "type": "loss", "content": 0.019750652834773064, "timestamp": "2025-09-10 02:16:05.277816", "step": 65, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:05.309484", "step": 65, "epoch": 1 }, { "type": "loss", "content": 0.022475482895970345, "timestamp": "2025-09-10 02:16:05.316076", "step": 66, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:05.349346", "step": 66, "epoch": 1 }, { "type": "loss", "content": 0.031203726306557655, "timestamp": "2025-09-10 02:16:05.351899", "step": 67, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:05.383549", "step": 67, "epoch": 1 }, { "type": "loss", "content": 0.026068750768899918, "timestamp": "2025-09-10 02:16:05.408325", "step": 68, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:05.441620", "step": 68, "epoch": 1 }, { "type": "loss", "content": 0.020249370485544205, "timestamp": "2025-09-10 02:16:05.445702", "step": 69, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:16:05.485622", "step": 69, "epoch": 1 }, { "type": "loss", "content": 0.02251577563583851, "timestamp": "2025-09-10 02:16:05.501212", "step": 70, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:05.533112", "step": 70, "epoch": 1 }, { "type": "loss", "content": 0.02423388697206974, "timestamp": "2025-09-10 02:16:05.540249", "step": 71, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:05.580456", "step": 71, "epoch": 1 }, { "type": "loss", "content": 0.016923097893595695, "timestamp": "2025-09-10 02:16:05.605965", "step": 72, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:05.641491", "step": 72, "epoch": 1 }, { "type": "loss", "content": 0.029709434136748314, "timestamp": "2025-09-10 02:16:05.644029", "step": 73, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:05.677693", "step": 73, "epoch": 1 }, { "type": "loss", "content": 0.013269062153995037, "timestamp": "2025-09-10 02:16:05.683412", "step": 74, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:05.716854", "step": 74, "epoch": 1 }, { "type": "loss", "content": 0.03226935863494873, "timestamp": "2025-09-10 02:16:05.725697", "step": 75, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:05.765814", "step": 75, "epoch": 1 }, { "type": "loss", "content": 0.020103048533201218, "timestamp": "2025-09-10 02:16:05.802277", "step": 76, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:05.833640", "step": 76, "epoch": 1 }, { "type": "loss", "content": 0.018879475072026253, "timestamp": "2025-09-10 02:16:05.837910", "step": 77, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:05.869054", "step": 77, "epoch": 1 }, { "type": "loss", "content": 0.038980383425951004, "timestamp": "2025-09-10 02:16:05.872959", "step": 78, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:05.906349", "step": 78, "epoch": 1 }, { "type": "loss", "content": 0.029591679573059082, "timestamp": "2025-09-10 02:16:05.919579", "step": 79, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:05.958106", "step": 79, "epoch": 1 }, { "type": "loss", "content": 0.014890284277498722, "timestamp": "2025-09-10 02:16:05.994447", "step": 80, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.026426", "step": 80, "epoch": 1 }, { "type": "loss", "content": 0.009303289465606213, "timestamp": "2025-09-10 02:16:06.028566", "step": 81, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:06.059374", "step": 81, "epoch": 1 }, { "type": "loss", "content": 0.02382597140967846, "timestamp": "2025-09-10 02:16:06.071305", "step": 82, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:06.103494", "step": 82, "epoch": 1 }, { "type": "loss", "content": 0.009986629709601402, "timestamp": "2025-09-10 02:16:06.113405", "step": 83, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.146437", "step": 83, "epoch": 1 }, { "type": "loss", "content": 0.022146521136164665, "timestamp": "2025-09-10 02:16:06.171621", "step": 84, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:06.203414", "step": 84, "epoch": 1 }, { "type": "loss", "content": 0.03081861138343811, "timestamp": "2025-09-10 02:16:06.207612", "step": 85, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:06.241382", "step": 85, "epoch": 1 }, { "type": "loss", "content": 0.042918942868709564, "timestamp": "2025-09-10 02:16:06.254938", "step": 86, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.286234", "step": 86, "epoch": 1 }, { "type": "loss", "content": 0.01463954895734787, "timestamp": "2025-09-10 02:16:06.290359", "step": 87, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:06.321404", "step": 87, "epoch": 1 }, { "type": "loss", "content": 0.025244222953915596, "timestamp": "2025-09-10 02:16:06.349501", "step": 88, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:16:06.393581", "step": 88, "epoch": 1 }, { "type": "loss", "content": 0.018959475681185722, "timestamp": "2025-09-10 02:16:06.410750", "step": 89, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:06.442741", "step": 89, "epoch": 1 }, { "type": "loss", "content": 0.019290607422590256, "timestamp": "2025-09-10 02:16:06.449555", "step": 90, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:06.480195", "step": 90, "epoch": 1 }, { "type": "loss", "content": 0.037016745656728745, "timestamp": "2025-09-10 02:16:06.486836", "step": 91, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:06.520237", "step": 91, "epoch": 1 }, { "type": "loss", "content": 0.01576540246605873, "timestamp": "2025-09-10 02:16:06.554391", "step": 92, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.586174", "step": 92, "epoch": 1 }, { "type": "loss", "content": 0.029999637976288795, "timestamp": "2025-09-10 02:16:06.588193", "step": 93, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:06.618830", "step": 93, "epoch": 1 }, { "type": "loss", "content": 0.007616397459059954, "timestamp": "2025-09-10 02:16:06.625809", "step": 94, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:16:06.672877", "step": 94, "epoch": 1 }, { "type": "loss", "content": 0.041450273245573044, "timestamp": "2025-09-10 02:16:06.687746", "step": 95, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:06.719554", "step": 95, "epoch": 1 }, { "type": "loss", "content": 0.028501790016889572, "timestamp": "2025-09-10 02:16:06.746921", "step": 96, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:06.780163", "step": 96, "epoch": 1 }, { "type": "loss", "content": 0.031911808997392654, "timestamp": "2025-09-10 02:16:06.785472", "step": 97, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.816090", "step": 97, "epoch": 1 }, { "type": "loss", "content": 0.010332711972296238, "timestamp": "2025-09-10 02:16:06.820432", "step": 98, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:06.850962", "step": 98, "epoch": 1 }, { "type": "loss", "content": 0.014578322879970074, "timestamp": "2025-09-10 02:16:06.855363", "step": 99, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:06.886347", "step": 99, "epoch": 1 }, { "type": "loss", "content": 0.030410753563046455, "timestamp": "2025-09-10 02:16:06.914188", "step": 100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:06.945693", "step": 100, "epoch": 1 }, { "type": "loss", "content": 0.019498659297823906, "timestamp": "2025-09-10 02:16:06.948062", "step": 101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:06.981389", "step": 101, "epoch": 1 }, { "type": "loss", "content": 0.02664143405854702, "timestamp": "2025-09-10 02:16:06.988136", "step": 102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:07.018987", "step": 102, "epoch": 1 }, { "type": "loss", "content": 0.0366455540060997, "timestamp": "2025-09-10 02:16:07.026481", "step": 103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:07.057910", "step": 103, "epoch": 1 }, { "type": "loss", "content": 0.021227413788437843, "timestamp": "2025-09-10 02:16:07.090878", "step": 104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:07.121568", "step": 104, "epoch": 1 }, { "type": "loss", "content": 0.016079608350992203, "timestamp": "2025-09-10 02:16:07.130132", "step": 105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:07.161498", "step": 105, "epoch": 1 }, { "type": "loss", "content": 0.01837443746626377, "timestamp": "2025-09-10 02:16:07.168957", "step": 106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:07.199456", "step": 106, "epoch": 1 }, { "type": "loss", "content": 0.029402051120996475, "timestamp": "2025-09-10 02:16:07.206169", "step": 107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:07.237182", "step": 107, "epoch": 1 }, { "type": "loss", "content": 0.021664408966898918, "timestamp": "2025-09-10 02:16:07.265754", "step": 108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:07.295601", "step": 108, "epoch": 1 }, { "type": "loss", "content": 0.01921442337334156, "timestamp": "2025-09-10 02:16:07.303172", "step": 109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:07.333506", "step": 109, "epoch": 1 }, { "type": "loss", "content": 0.016907794401049614, "timestamp": "2025-09-10 02:16:07.340549", "step": 110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:07.372342", "step": 110, "epoch": 1 }, { "type": "loss", "content": 0.018889309838414192, "timestamp": "2025-09-10 02:16:07.384880", "step": 111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:07.416259", "step": 111, "epoch": 1 }, { "type": "loss", "content": 0.014976476319134235, "timestamp": "2025-09-10 02:16:07.444498", "step": 112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:07.476485", "step": 112, "epoch": 1 }, { "type": "loss", "content": 0.020062191411852837, "timestamp": "2025-09-10 02:16:07.481612", "step": 113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:07.511720", "step": 113, "epoch": 1 }, { "type": "loss", "content": 0.023766087368130684, "timestamp": "2025-09-10 02:16:07.518727", "step": 114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:07.549994", "step": 114, "epoch": 1 }, { "type": "loss", "content": 0.021067747846245766, "timestamp": "2025-09-10 02:16:07.557452", "step": 115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:07.589333", "step": 115, "epoch": 1 }, { "type": "loss", "content": 0.020303750410676003, "timestamp": "2025-09-10 02:16:07.617297", "step": 116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:07.649366", "step": 116, "epoch": 1 }, { "type": "loss", "content": 0.02873547188937664, "timestamp": "2025-09-10 02:16:07.661942", "step": 117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:07.692247", "step": 117, "epoch": 1 }, { "type": "loss", "content": 0.021697448566555977, "timestamp": "2025-09-10 02:16:07.699045", "step": 118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:07.730118", "step": 118, "epoch": 1 }, { "type": "loss", "content": 0.02336110547184944, "timestamp": "2025-09-10 02:16:07.742698", "step": 119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:07.777761", "step": 119, "epoch": 1 }, { "type": "loss", "content": 0.02601124718785286, "timestamp": "2025-09-10 02:16:07.810365", "step": 120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:07.841544", "step": 120, "epoch": 1 }, { "type": "loss", "content": 0.019523756578564644, "timestamp": "2025-09-10 02:16:07.851022", "step": 121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:07.881927", "step": 121, "epoch": 1 }, { "type": "loss", "content": 0.019188281148672104, "timestamp": "2025-09-10 02:16:07.891951", "step": 122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:07.922749", "step": 122, "epoch": 1 }, { "type": "loss", "content": 0.020111748948693275, "timestamp": "2025-09-10 02:16:07.929577", "step": 123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:07.961525", "step": 123, "epoch": 1 }, { "type": "loss", "content": 0.02041424997150898, "timestamp": "2025-09-10 02:16:07.993208", "step": 124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:08.023902", "step": 124, "epoch": 1 }, { "type": "loss", "content": 0.03184106573462486, "timestamp": "2025-09-10 02:16:08.026634", "step": 125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:08.056664", "step": 125, "epoch": 1 }, { "type": "loss", "content": 0.02083834446966648, "timestamp": "2025-09-10 02:16:08.066958", "step": 126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:16:08.102647", "step": 126, "epoch": 1 }, { "type": "loss", "content": 0.025340793654322624, "timestamp": "2025-09-10 02:16:08.116615", "step": 127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:08.148390", "step": 127, "epoch": 1 }, { "type": "loss", "content": 0.021747667342424393, "timestamp": "2025-09-10 02:16:08.171962", "step": 128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:08.203715", "step": 128, "epoch": 1 }, { "type": "loss", "content": 0.025707753375172615, "timestamp": "2025-09-10 02:16:08.213867", "step": 129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:08.245075", "step": 129, "epoch": 1 }, { "type": "loss", "content": 0.01971745304763317, "timestamp": "2025-09-10 02:16:08.247284", "step": 130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:08.279079", "step": 130, "epoch": 1 }, { "type": "loss", "content": 0.02148953266441822, "timestamp": "2025-09-10 02:16:08.286537", "step": 131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:08.317714", "step": 131, "epoch": 1 }, { "type": "loss", "content": 0.02072383277118206, "timestamp": "2025-09-10 02:16:08.342913", "step": 132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:08.374235", "step": 132, "epoch": 1 }, { "type": "loss", "content": 0.021854082122445107, "timestamp": "2025-09-10 02:16:08.379220", "step": 133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:08.411433", "step": 133, "epoch": 1 }, { "type": "loss", "content": 0.016318751499056816, "timestamp": "2025-09-10 02:16:08.419281", "step": 134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:08.449943", "step": 134, "epoch": 1 }, { "type": "loss", "content": 0.02498047612607479, "timestamp": "2025-09-10 02:16:08.462062", "step": 135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:08.493203", "step": 135, "epoch": 1 }, { "type": "loss", "content": 0.02256803587079048, "timestamp": "2025-09-10 02:16:08.521197", "step": 136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:08.551083", "step": 136, "epoch": 1 }, { "type": "loss", "content": 0.017909932881593704, "timestamp": "2025-09-10 02:16:08.560671", "step": 137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:08.591595", "step": 137, "epoch": 1 }, { "type": "loss", "content": 0.019151031970977783, "timestamp": "2025-09-10 02:16:08.598213", "step": 138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:08.627700", "step": 138, "epoch": 1 }, { "type": "loss", "content": 0.01293948758393526, "timestamp": "2025-09-10 02:16:08.634698", "step": 139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:08.664634", "step": 139, "epoch": 1 }, { "type": "loss", "content": 0.014859228394925594, "timestamp": "2025-09-10 02:16:08.693246", "step": 140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:08.725823", "step": 140, "epoch": 1 }, { "type": "loss", "content": 0.030040746554732323, "timestamp": "2025-09-10 02:16:08.738785", "step": 141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:08.769782", "step": 141, "epoch": 1 }, { "type": "loss", "content": 0.015265722759068012, "timestamp": "2025-09-10 02:16:08.777294", "step": 142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:08.807493", "step": 142, "epoch": 1 }, { "type": "loss", "content": 0.02200271561741829, "timestamp": "2025-09-10 02:16:08.814902", "step": 143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:08.845397", "step": 143, "epoch": 1 }, { "type": "loss", "content": 0.023353280499577522, "timestamp": "2025-09-10 02:16:08.873964", "step": 144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:08.904724", "step": 144, "epoch": 1 }, { "type": "loss", "content": 0.01525102648884058, "timestamp": "2025-09-10 02:16:08.912397", "step": 145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:08.942660", "step": 145, "epoch": 1 }, { "type": "loss", "content": 0.02498231828212738, "timestamp": "2025-09-10 02:16:08.949473", "step": 146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:08.979736", "step": 146, "epoch": 1 }, { "type": "loss", "content": 0.0227807704359293, "timestamp": "2025-09-10 02:16:08.989868", "step": 147, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:16:20.735353", "step": 147, "epoch": 1 }, { "type": "pplx", "content": 12191892.104022551, "timestamp": "2025-09-10 02:16:20.742172", "step": 147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:20.777302", "step": 147, "epoch": 1 }, { "type": "loss", "content": 0.01853315904736519, "timestamp": "2025-09-10 02:16:20.804672", "step": 148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:20.847582", "step": 148, "epoch": 1 }, { "type": "loss", "content": 0.01748022995889187, "timestamp": "2025-09-10 02:16:20.852109", "step": 149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:20.892051", "step": 149, "epoch": 1 }, { "type": "loss", "content": 0.016961688175797462, "timestamp": "2025-09-10 02:16:20.898764", "step": 150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:16:20.955270", "step": 150, "epoch": 1 }, { "type": "loss", "content": 0.03245147690176964, "timestamp": "2025-09-10 02:16:20.968525", "step": 151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:21.030461", "step": 151, "epoch": 1 }, { "type": "loss", "content": 0.023213069885969162, "timestamp": "2025-09-10 02:16:21.061308", "step": 152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:21.113790", "step": 152, "epoch": 1 }, { "type": "loss", "content": 0.02709483541548252, "timestamp": "2025-09-10 02:16:21.117104", "step": 153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:21.150904", "step": 153, "epoch": 1 }, { "type": "loss", "content": 0.02957731857895851, "timestamp": "2025-09-10 02:16:21.157774", "step": 154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:21.192385", "step": 154, "epoch": 1 }, { "type": "loss", "content": 0.0102442791685462, "timestamp": "2025-09-10 02:16:21.204856", "step": 155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:21.237494", "step": 155, "epoch": 1 }, { "type": "loss", "content": 0.026265621185302734, "timestamp": "2025-09-10 02:16:21.262502", "step": 156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:21.295340", "step": 156, "epoch": 1 }, { "type": "loss", "content": 0.022335294634103775, "timestamp": "2025-09-10 02:16:21.298538", "step": 157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:21.332105", "step": 157, "epoch": 1 }, { "type": "loss", "content": 0.011588959954679012, "timestamp": "2025-09-10 02:16:21.344331", "step": 158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:21.377620", "step": 158, "epoch": 1 }, { "type": "loss", "content": 0.019390691071748734, "timestamp": "2025-09-10 02:16:21.390111", "step": 159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:21.421280", "step": 159, "epoch": 1 }, { "type": "loss", "content": 0.02968760021030903, "timestamp": "2025-09-10 02:16:21.449406", "step": 160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:21.481678", "step": 160, "epoch": 1 }, { "type": "loss", "content": 0.03552708774805069, "timestamp": "2025-09-10 02:16:21.484727", "step": 161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:21.517175", "step": 161, "epoch": 1 }, { "type": "loss", "content": 0.014869497157633305, "timestamp": "2025-09-10 02:16:21.522775", "step": 162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:21.556750", "step": 162, "epoch": 1 }, { "type": "loss", "content": 0.022128138691186905, "timestamp": "2025-09-10 02:16:21.563452", "step": 163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:21.595358", "step": 163, "epoch": 1 }, { "type": "loss", "content": 0.014087623916566372, "timestamp": "2025-09-10 02:16:21.623091", "step": 164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:21.655243", "step": 164, "epoch": 1 }, { "type": "loss", "content": 0.010876579210162163, "timestamp": "2025-09-10 02:16:21.659393", "step": 165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:21.692113", "step": 165, "epoch": 1 }, { "type": "loss", "content": 0.02489648386836052, "timestamp": "2025-09-10 02:16:21.701867", "step": 166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:21.734164", "step": 166, "epoch": 1 }, { "type": "loss", "content": 0.02895858697593212, "timestamp": "2025-09-10 02:16:21.745496", "step": 167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:21.780855", "step": 167, "epoch": 1 }, { "type": "loss", "content": 0.02555564045906067, "timestamp": "2025-09-10 02:16:21.815551", "step": 168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:21.847095", "step": 168, "epoch": 1 }, { "type": "loss", "content": 0.02748963236808777, "timestamp": "2025-09-10 02:16:21.859757", "step": 169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:21.891258", "step": 169, "epoch": 1 }, { "type": "loss", "content": 0.013958572410047054, "timestamp": "2025-09-10 02:16:21.894827", "step": 170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:21.925187", "step": 170, "epoch": 1 }, { "type": "loss", "content": 0.02482200227677822, "timestamp": "2025-09-10 02:16:21.929660", "step": 171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:21.960629", "step": 171, "epoch": 1 }, { "type": "loss", "content": 0.012857136316597462, "timestamp": "2025-09-10 02:16:21.994081", "step": 172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:22.026787", "step": 172, "epoch": 1 }, { "type": "loss", "content": 0.013918432407081127, "timestamp": "2025-09-10 02:16:22.039823", "step": 173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:22.069812", "step": 173, "epoch": 1 }, { "type": "loss", "content": 0.020305844023823738, "timestamp": "2025-09-10 02:16:22.073912", "step": 174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:22.106095", "step": 174, "epoch": 1 }, { "type": "loss", "content": 0.01923571154475212, "timestamp": "2025-09-10 02:16:22.112980", "step": 175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:22.145283", "step": 175, "epoch": 1 }, { "type": "loss", "content": 0.02769598178565502, "timestamp": "2025-09-10 02:16:22.177975", "step": 176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:22.210470", "step": 176, "epoch": 1 }, { "type": "loss", "content": 0.01644458808004856, "timestamp": "2025-09-10 02:16:22.214646", "step": 177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:16:22.265692", "step": 177, "epoch": 1 }, { "type": "loss", "content": 0.02010306902229786, "timestamp": "2025-09-10 02:16:22.287206", "step": 178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:22.318980", "step": 178, "epoch": 1 }, { "type": "loss", "content": 0.020176881924271584, "timestamp": "2025-09-10 02:16:22.331133", "step": 179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.361522", "step": 179, "epoch": 1 }, { "type": "loss", "content": 0.016681145876646042, "timestamp": "2025-09-10 02:16:22.386733", "step": 180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.417832", "step": 180, "epoch": 1 }, { "type": "loss", "content": 0.021172260865569115, "timestamp": "2025-09-10 02:16:22.420092", "step": 181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:22.453254", "step": 181, "epoch": 1 }, { "type": "loss", "content": 0.028826581314206123, "timestamp": "2025-09-10 02:16:22.466939", "step": 182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:16:22.501100", "step": 182, "epoch": 1 }, { "type": "loss", "content": 0.023097632452845573, "timestamp": "2025-09-10 02:16:22.515058", "step": 183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.545590", "step": 183, "epoch": 1 }, { "type": "loss", "content": 0.027354659512639046, "timestamp": "2025-09-10 02:16:22.570952", "step": 184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:22.600953", "step": 184, "epoch": 1 }, { "type": "loss", "content": 0.022371714934706688, "timestamp": "2025-09-10 02:16:22.605532", "step": 185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:22.637703", "step": 185, "epoch": 1 }, { "type": "loss", "content": 0.02118653617799282, "timestamp": "2025-09-10 02:16:22.644663", "step": 186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.675209", "step": 186, "epoch": 1 }, { "type": "loss", "content": 0.005522268824279308, "timestamp": "2025-09-10 02:16:22.679864", "step": 187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:22.710239", "step": 187, "epoch": 1 }, { "type": "loss", "content": 0.017090152949094772, "timestamp": "2025-09-10 02:16:22.741196", "step": 188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.770746", "step": 188, "epoch": 1 }, { "type": "loss", "content": 0.02689528279006481, "timestamp": "2025-09-10 02:16:22.775360", "step": 189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:22.807716", "step": 189, "epoch": 1 }, { "type": "loss", "content": 0.017292974516749382, "timestamp": "2025-09-10 02:16:22.815250", "step": 190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:22.845775", "step": 190, "epoch": 1 }, { "type": "loss", "content": 0.019936595112085342, "timestamp": "2025-09-10 02:16:22.852588", "step": 191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:16:22.886413", "step": 191, "epoch": 1 }, { "type": "loss", "content": 0.01834060624241829, "timestamp": "2025-09-10 02:16:22.921163", "step": 192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:22.952122", "step": 192, "epoch": 1 }, { "type": "loss", "content": 0.009056499227881432, "timestamp": "2025-09-10 02:16:22.957463", "step": 193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:22.987267", "step": 193, "epoch": 1 }, { "type": "loss", "content": 0.02178066037595272, "timestamp": "2025-09-10 02:16:22.991642", "step": 194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:23.024415", "step": 194, "epoch": 1 }, { "type": "loss", "content": 0.023802533745765686, "timestamp": "2025-09-10 02:16:23.030527", "step": 195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:23.062627", "step": 195, "epoch": 1 }, { "type": "loss", "content": 0.02088129334151745, "timestamp": "2025-09-10 02:16:23.091433", "step": 196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:23.125721", "step": 196, "epoch": 1 }, { "type": "loss", "content": 0.03935558721423149, "timestamp": "2025-09-10 02:16:23.133504", "step": 197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:23.169617", "step": 197, "epoch": 1 }, { "type": "loss", "content": 0.02025543339550495, "timestamp": "2025-09-10 02:16:23.176598", "step": 198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:23.208914", "step": 198, "epoch": 1 }, { "type": "loss", "content": 0.012568363919854164, "timestamp": "2025-09-10 02:16:23.216126", "step": 199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:23.246371", "step": 199, "epoch": 1 }, { "type": "loss", "content": 0.013729465194046497, "timestamp": "2025-09-10 02:16:23.278223", "step": 200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:23.309443", "step": 200, "epoch": 1 }, { "type": "loss", "content": 0.01759318821132183, "timestamp": "2025-09-10 02:16:23.313924", "step": 201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:23.344145", "step": 201, "epoch": 1 }, { "type": "loss", "content": 0.007888035848736763, "timestamp": "2025-09-10 02:16:23.351604", "step": 202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:23.382940", "step": 202, "epoch": 1 }, { "type": "loss", "content": 0.00965458806604147, "timestamp": "2025-09-10 02:16:23.390314", "step": 203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:23.423895", "step": 203, "epoch": 1 }, { "type": "loss", "content": 0.01958434283733368, "timestamp": "2025-09-10 02:16:23.452560", "step": 204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:23.481703", "step": 204, "epoch": 1 }, { "type": "loss", "content": 0.007743260823190212, "timestamp": "2025-09-10 02:16:23.487143", "step": 205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:23.517951", "step": 205, "epoch": 1 }, { "type": "loss", "content": 0.004702796693891287, "timestamp": "2025-09-10 02:16:23.525371", "step": 206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:23.555186", "step": 206, "epoch": 1 }, { "type": "loss", "content": 0.01660262979567051, "timestamp": "2025-09-10 02:16:23.562112", "step": 207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:23.593403", "step": 207, "epoch": 1 }, { "type": "loss", "content": 0.01599551923573017, "timestamp": "2025-09-10 02:16:23.626330", "step": 208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:23.658169", "step": 208, "epoch": 1 }, { "type": "loss", "content": 0.013740134425461292, "timestamp": "2025-09-10 02:16:23.662707", "step": 209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:23.693156", "step": 209, "epoch": 1 }, { "type": "loss", "content": 0.017071815207600594, "timestamp": "2025-09-10 02:16:23.696982", "step": 210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:23.731153", "step": 210, "epoch": 1 }, { "type": "loss", "content": 0.021906418725848198, "timestamp": "2025-09-10 02:16:23.738722", "step": 211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:23.775191", "step": 211, "epoch": 1 }, { "type": "loss", "content": 0.008046785369515419, "timestamp": "2025-09-10 02:16:23.800487", "step": 212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:23.830266", "step": 212, "epoch": 1 }, { "type": "loss", "content": 0.006279070395976305, "timestamp": "2025-09-10 02:16:23.832367", "step": 213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:23.862918", "step": 213, "epoch": 1 }, { "type": "loss", "content": 0.025564759969711304, "timestamp": "2025-09-10 02:16:23.866636", "step": 214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:23.898046", "step": 214, "epoch": 1 }, { "type": "loss", "content": 0.01694483682513237, "timestamp": "2025-09-10 02:16:23.905447", "step": 215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:23.938422", "step": 215, "epoch": 1 }, { "type": "loss", "content": 0.028261274099349976, "timestamp": "2025-09-10 02:16:23.966060", "step": 216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:23.995793", "step": 216, "epoch": 1 }, { "type": "loss", "content": 0.008908641524612904, "timestamp": "2025-09-10 02:16:23.997994", "step": 217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:24.033911", "step": 217, "epoch": 1 }, { "type": "loss", "content": 0.049135930836200714, "timestamp": "2025-09-10 02:16:24.047306", "step": 218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:24.077407", "step": 218, "epoch": 1 }, { "type": "loss", "content": 0.009086393751204014, "timestamp": "2025-09-10 02:16:24.085796", "step": 219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:24.121929", "step": 219, "epoch": 1 }, { "type": "loss", "content": 0.01568550243973732, "timestamp": "2025-09-10 02:16:24.155378", "step": 220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:24.187632", "step": 220, "epoch": 1 }, { "type": "loss", "content": 0.011114334687590599, "timestamp": "2025-09-10 02:16:24.200647", "step": 221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:24.237581", "step": 221, "epoch": 1 }, { "type": "loss", "content": 0.02597637288272381, "timestamp": "2025-09-10 02:16:24.244807", "step": 222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:24.278536", "step": 222, "epoch": 1 }, { "type": "loss", "content": 0.011223288252949715, "timestamp": "2025-09-10 02:16:24.285951", "step": 223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:24.317555", "step": 223, "epoch": 1 }, { "type": "loss", "content": 0.010914224199950695, "timestamp": "2025-09-10 02:16:24.345436", "step": 224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:16:24.397785", "step": 224, "epoch": 1 }, { "type": "loss", "content": 0.009774766862392426, "timestamp": "2025-09-10 02:16:24.413366", "step": 225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:24.445516", "step": 225, "epoch": 1 }, { "type": "loss", "content": 0.03243091329932213, "timestamp": "2025-09-10 02:16:24.456124", "step": 226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:24.487958", "step": 226, "epoch": 1 }, { "type": "loss", "content": 0.005026062484830618, "timestamp": "2025-09-10 02:16:24.498899", "step": 227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:24.531723", "step": 227, "epoch": 1 }, { "type": "loss", "content": 0.017378708347678185, "timestamp": "2025-09-10 02:16:24.557445", "step": 228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:24.588636", "step": 228, "epoch": 1 }, { "type": "loss", "content": 0.030252641066908836, "timestamp": "2025-09-10 02:16:24.601298", "step": 229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:24.633811", "step": 229, "epoch": 1 }, { "type": "loss", "content": 0.02279387228190899, "timestamp": "2025-09-10 02:16:24.646266", "step": 230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:24.683908", "step": 230, "epoch": 1 }, { "type": "loss", "content": 0.008336501196026802, "timestamp": "2025-09-10 02:16:24.699519", "step": 231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:24.730452", "step": 231, "epoch": 1 }, { "type": "loss", "content": 0.016707872971892357, "timestamp": "2025-09-10 02:16:24.761385", "step": 232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:24.794054", "step": 232, "epoch": 1 }, { "type": "loss", "content": 0.023161133751273155, "timestamp": "2025-09-10 02:16:24.803510", "step": 233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:24.839446", "step": 233, "epoch": 1 }, { "type": "loss", "content": 0.011448432691395283, "timestamp": "2025-09-10 02:16:24.851561", "step": 234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:24.887091", "step": 234, "epoch": 1 }, { "type": "loss", "content": 0.034271273761987686, "timestamp": "2025-09-10 02:16:24.894082", "step": 235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:24.924893", "step": 235, "epoch": 1 }, { "type": "loss", "content": 0.014886337332427502, "timestamp": "2025-09-10 02:16:24.952841", "step": 236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:24.991322", "step": 236, "epoch": 1 }, { "type": "loss", "content": 0.02193574421107769, "timestamp": "2025-09-10 02:16:25.010432", "step": 237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:25.054375", "step": 237, "epoch": 1 }, { "type": "loss", "content": 0.009645821526646614, "timestamp": "2025-09-10 02:16:25.061131", "step": 238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:25.100555", "step": 238, "epoch": 1 }, { "type": "loss", "content": 0.009037821553647518, "timestamp": "2025-09-10 02:16:25.107882", "step": 239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:25.148957", "step": 239, "epoch": 1 }, { "type": "loss", "content": 0.014126168563961983, "timestamp": "2025-09-10 02:16:25.185418", "step": 240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:25.218887", "step": 240, "epoch": 1 }, { "type": "loss", "content": 0.005452022887766361, "timestamp": "2025-09-10 02:16:25.224457", "step": 241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:25.254336", "step": 241, "epoch": 1 }, { "type": "loss", "content": 0.03313310071825981, "timestamp": "2025-09-10 02:16:25.261497", "step": 242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:25.291853", "step": 242, "epoch": 1 }, { "type": "loss", "content": 0.03182428702712059, "timestamp": "2025-09-10 02:16:25.302738", "step": 243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:25.338675", "step": 243, "epoch": 1 }, { "type": "loss", "content": 0.029811818152666092, "timestamp": "2025-09-10 02:16:25.366539", "step": 244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:25.397901", "step": 244, "epoch": 1 }, { "type": "loss", "content": 0.03838468715548515, "timestamp": "2025-09-10 02:16:25.402821", "step": 245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:25.438631", "step": 245, "epoch": 1 }, { "type": "loss", "content": 0.02467919886112213, "timestamp": "2025-09-10 02:16:25.445474", "step": 246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:25.479124", "step": 246, "epoch": 1 }, { "type": "loss", "content": 0.00823969580233097, "timestamp": "2025-09-10 02:16:25.486640", "step": 247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:16:25.528750", "step": 247, "epoch": 1 }, { "type": "loss", "content": 0.011138495989143848, "timestamp": "2025-09-10 02:16:25.565512", "step": 248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:25.596292", "step": 248, "epoch": 1 }, { "type": "loss", "content": 0.03702753037214279, "timestamp": "2025-09-10 02:16:25.601440", "step": 249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:25.634575", "step": 249, "epoch": 1 }, { "type": "loss", "content": 0.007849356159567833, "timestamp": "2025-09-10 02:16:25.647926", "step": 250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:25.678453", "step": 250, "epoch": 1 }, { "type": "loss", "content": 0.00440265703946352, "timestamp": "2025-09-10 02:16:25.685668", "step": 251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:25.715578", "step": 251, "epoch": 1 }, { "type": "loss", "content": 0.0027811271138489246, "timestamp": "2025-09-10 02:16:25.749017", "step": 252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:25.779626", "step": 252, "epoch": 1 }, { "type": "loss", "content": 0.002288882387802005, "timestamp": "2025-09-10 02:16:25.781615", "step": 253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:25.811818", "step": 253, "epoch": 1 }, { "type": "loss", "content": 0.02566692791879177, "timestamp": "2025-09-10 02:16:25.819734", "step": 254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:25.851082", "step": 254, "epoch": 1 }, { "type": "loss", "content": 0.016085701063275337, "timestamp": "2025-09-10 02:16:25.857937", "step": 255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:25.888181", "step": 255, "epoch": 1 }, { "type": "loss", "content": 0.007969672791659832, "timestamp": "2025-09-10 02:16:25.916936", "step": 256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:25.948366", "step": 256, "epoch": 1 }, { "type": "loss", "content": 0.02336304821074009, "timestamp": "2025-09-10 02:16:25.953983", "step": 257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:25.986175", "step": 257, "epoch": 1 }, { "type": "loss", "content": 0.010644437745213509, "timestamp": "2025-09-10 02:16:25.993642", "step": 258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:26.026399", "step": 258, "epoch": 1 }, { "type": "loss", "content": 0.009769896045327187, "timestamp": "2025-09-10 02:16:26.030862", "step": 259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:26.063269", "step": 259, "epoch": 1 }, { "type": "loss", "content": 0.015422756783664227, "timestamp": "2025-09-10 02:16:26.091962", "step": 260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:26.123418", "step": 260, "epoch": 1 }, { "type": "loss", "content": 0.031315069645643234, "timestamp": "2025-09-10 02:16:26.136180", "step": 261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:26.166267", "step": 261, "epoch": 1 }, { "type": "loss", "content": 0.033414822071790695, "timestamp": "2025-09-10 02:16:26.173215", "step": 262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:26.206363", "step": 262, "epoch": 1 }, { "type": "loss", "content": 0.015460536815226078, "timestamp": "2025-09-10 02:16:26.208832", "step": 263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:26.240930", "step": 263, "epoch": 1 }, { "type": "loss", "content": 0.030592020601034164, "timestamp": "2025-09-10 02:16:26.272768", "step": 264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:26.302446", "step": 264, "epoch": 1 }, { "type": "loss", "content": 0.01532017719000578, "timestamp": "2025-09-10 02:16:26.307982", "step": 265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:26.341058", "step": 265, "epoch": 1 }, { "type": "loss", "content": 0.033243995159864426, "timestamp": "2025-09-10 02:16:26.354418", "step": 266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:26.387090", "step": 266, "epoch": 1 }, { "type": "loss", "content": 0.017378225922584534, "timestamp": "2025-09-10 02:16:26.394754", "step": 267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:26.424486", "step": 267, "epoch": 1 }, { "type": "loss", "content": 0.02742266096174717, "timestamp": "2025-09-10 02:16:26.448727", "step": 268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:26.482635", "step": 268, "epoch": 1 }, { "type": "loss", "content": 0.04506625607609749, "timestamp": "2025-09-10 02:16:26.487290", "step": 269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:26.518809", "step": 269, "epoch": 1 }, { "type": "loss", "content": 0.013364973478019238, "timestamp": "2025-09-10 02:16:26.531381", "step": 270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:26.562970", "step": 270, "epoch": 1 }, { "type": "loss", "content": 0.01263825036585331, "timestamp": "2025-09-10 02:16:26.569994", "step": 271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:26.602400", "step": 271, "epoch": 1 }, { "type": "loss", "content": 0.03316551819443703, "timestamp": "2025-09-10 02:16:26.630937", "step": 272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:26.675334", "step": 272, "epoch": 1 }, { "type": "loss", "content": 0.019756343215703964, "timestamp": "2025-09-10 02:16:26.680035", "step": 273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:26.715960", "step": 273, "epoch": 1 }, { "type": "loss", "content": 0.0236830972135067, "timestamp": "2025-09-10 02:16:26.723355", "step": 274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:26.761266", "step": 274, "epoch": 1 }, { "type": "loss", "content": 0.00928380899131298, "timestamp": "2025-09-10 02:16:26.768864", "step": 275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:26.810695", "step": 275, "epoch": 1 }, { "type": "loss", "content": 0.014733054675161839, "timestamp": "2025-09-10 02:16:26.835676", "step": 276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:26.871465", "step": 276, "epoch": 1 }, { "type": "loss", "content": 0.03336886316537857, "timestamp": "2025-09-10 02:16:26.884467", "step": 277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:26.916086", "step": 277, "epoch": 1 }, { "type": "loss", "content": 0.008176090195775032, "timestamp": "2025-09-10 02:16:26.922890", "step": 278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:26.953543", "step": 278, "epoch": 1 }, { "type": "loss", "content": 0.010884806513786316, "timestamp": "2025-09-10 02:16:26.960988", "step": 279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:26.991008", "step": 279, "epoch": 1 }, { "type": "loss", "content": 0.010812760330736637, "timestamp": "2025-09-10 02:16:27.019416", "step": 280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:27.052033", "step": 280, "epoch": 1 }, { "type": "loss", "content": 0.018427478149533272, "timestamp": "2025-09-10 02:16:27.061756", "step": 281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:27.091729", "step": 281, "epoch": 1 }, { "type": "loss", "content": 0.005787822883576155, "timestamp": "2025-09-10 02:16:27.094494", "step": 282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:27.126564", "step": 282, "epoch": 1 }, { "type": "loss", "content": 0.026878537610173225, "timestamp": "2025-09-10 02:16:27.133510", "step": 283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:27.163712", "step": 283, "epoch": 1 }, { "type": "loss", "content": 0.014809337444603443, "timestamp": "2025-09-10 02:16:27.191924", "step": 284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:27.231470", "step": 284, "epoch": 1 }, { "type": "loss", "content": 0.0403473936021328, "timestamp": "2025-09-10 02:16:27.233854", "step": 285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:27.264630", "step": 285, "epoch": 1 }, { "type": "loss", "content": 0.009624199941754341, "timestamp": "2025-09-10 02:16:27.271396", "step": 286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:27.307634", "step": 286, "epoch": 1 }, { "type": "loss", "content": 0.00741335516795516, "timestamp": "2025-09-10 02:16:27.314627", "step": 287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:27.346502", "step": 287, "epoch": 1 }, { "type": "loss", "content": 0.018726302310824394, "timestamp": "2025-09-10 02:16:27.374123", "step": 288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:27.404826", "step": 288, "epoch": 1 }, { "type": "loss", "content": 0.043964944779872894, "timestamp": "2025-09-10 02:16:27.412272", "step": 289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:27.443455", "step": 289, "epoch": 1 }, { "type": "loss", "content": 0.0012115959543734789, "timestamp": "2025-09-10 02:16:27.450533", "step": 290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:27.482885", "step": 290, "epoch": 1 }, { "type": "loss", "content": 0.016456475481390953, "timestamp": "2025-09-10 02:16:27.495010", "step": 291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:27.525943", "step": 291, "epoch": 1 }, { "type": "loss", "content": 0.0014793974114581943, "timestamp": "2025-09-10 02:16:27.557732", "step": 292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:27.588473", "step": 292, "epoch": 1 }, { "type": "loss", "content": 0.01667448878288269, "timestamp": "2025-09-10 02:16:27.596371", "step": 293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:27.627182", "step": 293, "epoch": 1 }, { "type": "loss", "content": 0.013031461276113987, "timestamp": "2025-09-10 02:16:27.633929", "step": 294, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:16:38.184699", "step": 294, "epoch": 1 }, { "type": "pplx", "content": 15332585.816633547, "timestamp": "2025-09-10 02:16:38.187323", "step": 294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:38.219791", "step": 294, "epoch": 1 }, { "type": "loss", "content": 0.023444533348083496, "timestamp": "2025-09-10 02:16:38.233473", "step": 295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:16:38.268307", "step": 295, "epoch": 1 }, { "type": "loss", "content": 0.008510954678058624, "timestamp": "2025-09-10 02:16:38.303183", "step": 296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:38.335146", "step": 296, "epoch": 1 }, { "type": "loss", "content": 0.015145028941333294, "timestamp": "2025-09-10 02:16:38.339032", "step": 297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:38.370675", "step": 297, "epoch": 1 }, { "type": "loss", "content": 0.0069735231809318066, "timestamp": "2025-09-10 02:16:38.374348", "step": 298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:38.412173", "step": 298, "epoch": 1 }, { "type": "loss", "content": 0.013920980505645275, "timestamp": "2025-09-10 02:16:38.427806", "step": 299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:38.460108", "step": 299, "epoch": 1 }, { "type": "loss", "content": 0.04089200869202614, "timestamp": "2025-09-10 02:16:38.485588", "step": 300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:38.520153", "step": 300, "epoch": 1 }, { "type": "loss", "content": 0.013281070627272129, "timestamp": "2025-09-10 02:16:38.523981", "step": 301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:38.558005", "step": 301, "epoch": 1 }, { "type": "loss", "content": 0.03492172807455063, "timestamp": "2025-09-10 02:16:38.567580", "step": 302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:38.600149", "step": 302, "epoch": 1 }, { "type": "loss", "content": 0.03769215941429138, "timestamp": "2025-09-10 02:16:38.609626", "step": 303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:38.641634", "step": 303, "epoch": 1 }, { "type": "loss", "content": 0.010984980501234531, "timestamp": "2025-09-10 02:16:38.669717", "step": 304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:38.699933", "step": 304, "epoch": 1 }, { "type": "loss", "content": 0.016611166298389435, "timestamp": "2025-09-10 02:16:38.707452", "step": 305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:38.739406", "step": 305, "epoch": 1 }, { "type": "loss", "content": 0.013240032829344273, "timestamp": "2025-09-10 02:16:38.750082", "step": 306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:38.780171", "step": 306, "epoch": 1 }, { "type": "loss", "content": 0.01312293391674757, "timestamp": "2025-09-10 02:16:38.792673", "step": 307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:38.823157", "step": 307, "epoch": 1 }, { "type": "loss", "content": 0.01570984721183777, "timestamp": "2025-09-10 02:16:38.851813", "step": 308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:38.882826", "step": 308, "epoch": 1 }, { "type": "loss", "content": 0.011351371183991432, "timestamp": "2025-09-10 02:16:38.887319", "step": 309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:38.916806", "step": 309, "epoch": 1 }, { "type": "loss", "content": 0.006130642257630825, "timestamp": "2025-09-10 02:16:38.920935", "step": 310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:38.952461", "step": 310, "epoch": 1 }, { "type": "loss", "content": 0.02042931318283081, "timestamp": "2025-09-10 02:16:38.959345", "step": 311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:16:38.994181", "step": 311, "epoch": 1 }, { "type": "loss", "content": 0.008902345784008503, "timestamp": "2025-09-10 02:16:39.029134", "step": 312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:39.062647", "step": 312, "epoch": 1 }, { "type": "loss", "content": 0.02012813650071621, "timestamp": "2025-09-10 02:16:39.075585", "step": 313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:39.106912", "step": 313, "epoch": 1 }, { "type": "loss", "content": 0.022141067311167717, "timestamp": "2025-09-10 02:16:39.118577", "step": 314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:39.151698", "step": 314, "epoch": 1 }, { "type": "loss", "content": 0.03623204678297043, "timestamp": "2025-09-10 02:16:39.154698", "step": 315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:39.185998", "step": 315, "epoch": 1 }, { "type": "loss", "content": 0.0135353934019804, "timestamp": "2025-09-10 02:16:39.213392", "step": 316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:39.243549", "step": 316, "epoch": 1 }, { "type": "loss", "content": 0.028507256880402565, "timestamp": "2025-09-10 02:16:39.248758", "step": 317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:39.279221", "step": 317, "epoch": 1 }, { "type": "loss", "content": 0.029799891635775566, "timestamp": "2025-09-10 02:16:39.290132", "step": 318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:39.319964", "step": 318, "epoch": 1 }, { "type": "loss", "content": 0.02186533249914646, "timestamp": "2025-09-10 02:16:39.326712", "step": 319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:39.356706", "step": 319, "epoch": 1 }, { "type": "loss", "content": 0.014612867496907711, "timestamp": "2025-09-10 02:16:39.387845", "step": 320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:39.425403", "step": 320, "epoch": 1 }, { "type": "loss", "content": 0.012654599733650684, "timestamp": "2025-09-10 02:16:39.440561", "step": 321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:39.474992", "step": 321, "epoch": 1 }, { "type": "loss", "content": 0.021614069119095802, "timestamp": "2025-09-10 02:16:39.482465", "step": 322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:39.513753", "step": 322, "epoch": 1 }, { "type": "loss", "content": 0.012967349961400032, "timestamp": "2025-09-10 02:16:39.526078", "step": 323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:39.560156", "step": 323, "epoch": 1 }, { "type": "loss", "content": 0.024872979149222374, "timestamp": "2025-09-10 02:16:39.588086", "step": 324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:39.619750", "step": 324, "epoch": 1 }, { "type": "loss", "content": 0.026342902332544327, "timestamp": "2025-09-10 02:16:39.624580", "step": 325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:39.654950", "step": 325, "epoch": 1 }, { "type": "loss", "content": 0.006380206905305386, "timestamp": "2025-09-10 02:16:39.659454", "step": 326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:39.689823", "step": 326, "epoch": 1 }, { "type": "loss", "content": 0.014897564426064491, "timestamp": "2025-09-10 02:16:39.694202", "step": 327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:39.724802", "step": 327, "epoch": 1 }, { "type": "loss", "content": 0.02040562406182289, "timestamp": "2025-09-10 02:16:39.755826", "step": 328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:39.787423", "step": 328, "epoch": 1 }, { "type": "loss", "content": 0.015893712639808655, "timestamp": "2025-09-10 02:16:39.789419", "step": 329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:39.819626", "step": 329, "epoch": 1 }, { "type": "loss", "content": 0.03249195218086243, "timestamp": "2025-09-10 02:16:39.826538", "step": 330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:39.859763", "step": 330, "epoch": 1 }, { "type": "loss", "content": 0.025817295536398888, "timestamp": "2025-09-10 02:16:39.866827", "step": 331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:39.897181", "step": 331, "epoch": 1 }, { "type": "loss", "content": 0.030507784336805344, "timestamp": "2025-09-10 02:16:39.928222", "step": 332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:39.958589", "step": 332, "epoch": 1 }, { "type": "loss", "content": 0.04506632685661316, "timestamp": "2025-09-10 02:16:39.964162", "step": 333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:39.994622", "step": 333, "epoch": 1 }, { "type": "loss", "content": 0.009867721237242222, "timestamp": "2025-09-10 02:16:40.001420", "step": 334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:40.031806", "step": 334, "epoch": 1 }, { "type": "loss", "content": 0.03285963833332062, "timestamp": "2025-09-10 02:16:40.038877", "step": 335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:40.068180", "step": 335, "epoch": 1 }, { "type": "loss", "content": 0.004922616295516491, "timestamp": "2025-09-10 02:16:40.096072", "step": 336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:40.125555", "step": 336, "epoch": 1 }, { "type": "loss", "content": 0.011179156601428986, "timestamp": "2025-09-10 02:16:40.127649", "step": 337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:40.157666", "step": 337, "epoch": 1 }, { "type": "loss", "content": 0.011313307099044323, "timestamp": "2025-09-10 02:16:40.161788", "step": 338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:40.191735", "step": 338, "epoch": 1 }, { "type": "loss", "content": 0.008836266584694386, "timestamp": "2025-09-10 02:16:40.196410", "step": 339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:40.226239", "step": 339, "epoch": 1 }, { "type": "loss", "content": 0.021027730777859688, "timestamp": "2025-09-10 02:16:40.251163", "step": 340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:40.282459", "step": 340, "epoch": 1 }, { "type": "loss", "content": 0.020776817575097084, "timestamp": "2025-09-10 02:16:40.289283", "step": 341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:40.322361", "step": 341, "epoch": 1 }, { "type": "loss", "content": 0.013467268086969852, "timestamp": "2025-09-10 02:16:40.325962", "step": 342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:40.358346", "step": 342, "epoch": 1 }, { "type": "loss", "content": 0.0017702631885185838, "timestamp": "2025-09-10 02:16:40.365035", "step": 343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:40.395803", "step": 343, "epoch": 1 }, { "type": "loss", "content": 0.01885797269642353, "timestamp": "2025-09-10 02:16:40.424139", "step": 344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:40.455425", "step": 344, "epoch": 1 }, { "type": "loss", "content": 0.011524977162480354, "timestamp": "2025-09-10 02:16:40.465019", "step": 345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:40.496082", "step": 345, "epoch": 1 }, { "type": "loss", "content": 0.020894749090075493, "timestamp": "2025-09-10 02:16:40.500138", "step": 346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:40.533156", "step": 346, "epoch": 1 }, { "type": "loss", "content": 0.022029070183634758, "timestamp": "2025-09-10 02:16:40.543987", "step": 347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:40.574403", "step": 347, "epoch": 1 }, { "type": "loss", "content": 0.018113840371370316, "timestamp": "2025-09-10 02:16:40.602678", "step": 348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:40.632904", "step": 348, "epoch": 1 }, { "type": "loss", "content": 0.030301451683044434, "timestamp": "2025-09-10 02:16:40.637626", "step": 349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:40.667865", "step": 349, "epoch": 1 }, { "type": "loss", "content": 0.008944302797317505, "timestamp": "2025-09-10 02:16:40.674887", "step": 350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:40.706075", "step": 350, "epoch": 1 }, { "type": "loss", "content": 0.015170658007264137, "timestamp": "2025-09-10 02:16:40.716939", "step": 351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:40.747818", "step": 351, "epoch": 1 }, { "type": "loss", "content": 0.004507328849285841, "timestamp": "2025-09-10 02:16:40.775419", "step": 352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:40.805563", "step": 352, "epoch": 1 }, { "type": "loss", "content": 0.03259176388382912, "timestamp": "2025-09-10 02:16:40.810294", "step": 353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:40.840532", "step": 353, "epoch": 1 }, { "type": "loss", "content": 0.019336406141519547, "timestamp": "2025-09-10 02:16:40.848235", "step": 354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:40.879018", "step": 354, "epoch": 1 }, { "type": "loss", "content": 0.009617815725505352, "timestamp": "2025-09-10 02:16:40.891221", "step": 355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:40.924595", "step": 355, "epoch": 1 }, { "type": "loss", "content": 0.01103215478360653, "timestamp": "2025-09-10 02:16:40.958887", "step": 356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:40.992068", "step": 356, "epoch": 1 }, { "type": "loss", "content": 0.004833351355046034, "timestamp": "2025-09-10 02:16:41.000528", "step": 357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:16:41.061147", "step": 357, "epoch": 1 }, { "type": "loss", "content": 0.029006347060203552, "timestamp": "2025-09-10 02:16:41.084566", "step": 358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:41.115146", "step": 358, "epoch": 1 }, { "type": "loss", "content": 0.0070088389329612255, "timestamp": "2025-09-10 02:16:41.122685", "step": 359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:41.153083", "step": 359, "epoch": 1 }, { "type": "loss", "content": 0.005866586230695248, "timestamp": "2025-09-10 02:16:41.180827", "step": 360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:41.211081", "step": 360, "epoch": 1 }, { "type": "loss", "content": 0.005960374139249325, "timestamp": "2025-09-10 02:16:41.216464", "step": 361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:41.247340", "step": 361, "epoch": 1 }, { "type": "loss", "content": 0.020136630162596703, "timestamp": "2025-09-10 02:16:41.253990", "step": 362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:41.285009", "step": 362, "epoch": 1 }, { "type": "loss", "content": 0.019610974937677383, "timestamp": "2025-09-10 02:16:41.291838", "step": 363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:41.322911", "step": 363, "epoch": 1 }, { "type": "loss", "content": 0.008913476951420307, "timestamp": "2025-09-10 02:16:41.356086", "step": 364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:41.387239", "step": 364, "epoch": 1 }, { "type": "loss", "content": 0.011225526221096516, "timestamp": "2025-09-10 02:16:41.392356", "step": 365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:41.423802", "step": 365, "epoch": 1 }, { "type": "loss", "content": 0.006913323421031237, "timestamp": "2025-09-10 02:16:41.431320", "step": 366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:41.461790", "step": 366, "epoch": 1 }, { "type": "loss", "content": 0.009624729864299297, "timestamp": "2025-09-10 02:16:41.469080", "step": 367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:41.499416", "step": 367, "epoch": 1 }, { "type": "loss", "content": 0.025887373834848404, "timestamp": "2025-09-10 02:16:41.527240", "step": 368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:41.558343", "step": 368, "epoch": 1 }, { "type": "loss", "content": 0.008098089136183262, "timestamp": "2025-09-10 02:16:41.562878", "step": 369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:41.593446", "step": 369, "epoch": 1 }, { "type": "loss", "content": 0.006331682205200195, "timestamp": "2025-09-10 02:16:41.600508", "step": 370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:41.630146", "step": 370, "epoch": 1 }, { "type": "loss", "content": 0.006118200719356537, "timestamp": "2025-09-10 02:16:41.634316", "step": 371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:41.665242", "step": 371, "epoch": 1 }, { "type": "loss", "content": 0.008842705748975277, "timestamp": "2025-09-10 02:16:41.696961", "step": 372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:41.727842", "step": 372, "epoch": 1 }, { "type": "loss", "content": 0.0035271942615509033, "timestamp": "2025-09-10 02:16:41.732390", "step": 373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:41.762973", "step": 373, "epoch": 1 }, { "type": "loss", "content": 0.03405757620930672, "timestamp": "2025-09-10 02:16:41.770190", "step": 374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:41.800748", "step": 374, "epoch": 1 }, { "type": "loss", "content": 0.011618994176387787, "timestamp": "2025-09-10 02:16:41.807526", "step": 375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:41.838416", "step": 375, "epoch": 1 }, { "type": "loss", "content": 0.02129237912595272, "timestamp": "2025-09-10 02:16:41.871884", "step": 376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:16:41.911666", "step": 376, "epoch": 1 }, { "type": "loss", "content": 0.010881869122385979, "timestamp": "2025-09-10 02:16:41.929030", "step": 377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:41.962237", "step": 377, "epoch": 1 }, { "type": "loss", "content": 0.043674368411302567, "timestamp": "2025-09-10 02:16:41.969644", "step": 378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.001605", "step": 378, "epoch": 1 }, { "type": "loss", "content": 0.026635179296135902, "timestamp": "2025-09-10 02:16:42.007745", "step": 379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:42.038844", "step": 379, "epoch": 1 }, { "type": "loss", "content": 0.020435309037566185, "timestamp": "2025-09-10 02:16:42.066678", "step": 380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.096798", "step": 380, "epoch": 1 }, { "type": "loss", "content": 0.00813285168260336, "timestamp": "2025-09-10 02:16:42.101285", "step": 381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.131500", "step": 381, "epoch": 1 }, { "type": "loss", "content": 0.002828313270583749, "timestamp": "2025-09-10 02:16:42.138351", "step": 382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.168544", "step": 382, "epoch": 1 }, { "type": "loss", "content": 0.009776918217539787, "timestamp": "2025-09-10 02:16:42.175390", "step": 383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:42.205969", "step": 383, "epoch": 1 }, { "type": "loss", "content": 0.005225719418376684, "timestamp": "2025-09-10 02:16:42.237096", "step": 384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:42.268769", "step": 384, "epoch": 1 }, { "type": "loss", "content": 0.01830691285431385, "timestamp": "2025-09-10 02:16:42.281411", "step": 385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.311923", "step": 385, "epoch": 1 }, { "type": "loss", "content": 0.03338460996747017, "timestamp": "2025-09-10 02:16:42.318762", "step": 386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:42.352378", "step": 386, "epoch": 1 }, { "type": "loss", "content": 0.009636408649384975, "timestamp": "2025-09-10 02:16:42.356616", "step": 387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:42.392495", "step": 387, "epoch": 1 }, { "type": "loss", "content": 0.011447208002209663, "timestamp": "2025-09-10 02:16:42.427109", "step": 388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:42.458611", "step": 388, "epoch": 1 }, { "type": "loss", "content": 0.008088217116892338, "timestamp": "2025-09-10 02:16:42.463326", "step": 389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:42.496371", "step": 389, "epoch": 1 }, { "type": "loss", "content": 0.015474558807909489, "timestamp": "2025-09-10 02:16:42.500585", "step": 390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:42.531277", "step": 390, "epoch": 1 }, { "type": "loss", "content": 0.02543746307492256, "timestamp": "2025-09-10 02:16:42.542317", "step": 391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:42.573162", "step": 391, "epoch": 1 }, { "type": "loss", "content": 0.025561505928635597, "timestamp": "2025-09-10 02:16:42.601815", "step": 392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:42.632716", "step": 392, "epoch": 1 }, { "type": "loss", "content": 0.03465661779046059, "timestamp": "2025-09-10 02:16:42.640158", "step": 393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:42.671412", "step": 393, "epoch": 1 }, { "type": "loss", "content": 0.01359565556049347, "timestamp": "2025-09-10 02:16:42.680964", "step": 394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:42.711581", "step": 394, "epoch": 1 }, { "type": "loss", "content": 0.007907412014901638, "timestamp": "2025-09-10 02:16:42.718717", "step": 395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:42.750176", "step": 395, "epoch": 1 }, { "type": "loss", "content": 0.0071455794386565685, "timestamp": "2025-09-10 02:16:42.783294", "step": 396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:42.816573", "step": 396, "epoch": 1 }, { "type": "loss", "content": 0.0031619654037058353, "timestamp": "2025-09-10 02:16:42.820323", "step": 397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:42.851829", "step": 397, "epoch": 1 }, { "type": "loss", "content": 0.004171676468104124, "timestamp": "2025-09-10 02:16:42.858972", "step": 398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:42.890758", "step": 398, "epoch": 1 }, { "type": "loss", "content": 0.029826102778315544, "timestamp": "2025-09-10 02:16:42.897555", "step": 399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:42.928308", "step": 399, "epoch": 1 }, { "type": "loss", "content": 0.01927870139479637, "timestamp": "2025-09-10 02:16:42.953538", "step": 400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:42.985120", "step": 400, "epoch": 1 }, { "type": "loss", "content": 0.021446945145726204, "timestamp": "2025-09-10 02:16:42.989056", "step": 401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.022395", "step": 401, "epoch": 1 }, { "type": "loss", "content": 0.007334953639656305, "timestamp": "2025-09-10 02:16:43.031856", "step": 402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:43.064842", "step": 402, "epoch": 1 }, { "type": "loss", "content": 0.01580173708498478, "timestamp": "2025-09-10 02:16:43.076947", "step": 403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:43.108486", "step": 403, "epoch": 1 }, { "type": "loss", "content": 0.004910886753350496, "timestamp": "2025-09-10 02:16:43.133400", "step": 404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:43.165557", "step": 404, "epoch": 1 }, { "type": "loss", "content": 0.016585027799010277, "timestamp": "2025-09-10 02:16:43.169480", "step": 405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:43.201730", "step": 405, "epoch": 1 }, { "type": "loss", "content": 0.025732260197401047, "timestamp": "2025-09-10 02:16:43.208429", "step": 406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:43.239305", "step": 406, "epoch": 1 }, { "type": "loss", "content": 0.017954887822270393, "timestamp": "2025-09-10 02:16:43.246796", "step": 407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:43.277735", "step": 407, "epoch": 1 }, { "type": "loss", "content": 0.004366433713585138, "timestamp": "2025-09-10 02:16:43.310485", "step": 408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:43.342601", "step": 408, "epoch": 1 }, { "type": "loss", "content": 0.02379104681313038, "timestamp": "2025-09-10 02:16:43.352185", "step": 409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:43.382988", "step": 409, "epoch": 1 }, { "type": "loss", "content": 0.02213932015001774, "timestamp": "2025-09-10 02:16:43.390277", "step": 410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:43.421169", "step": 410, "epoch": 1 }, { "type": "loss", "content": 0.03638289123773575, "timestamp": "2025-09-10 02:16:43.427925", "step": 411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:43.459158", "step": 411, "epoch": 1 }, { "type": "loss", "content": 0.004115985240787268, "timestamp": "2025-09-10 02:16:43.486953", "step": 412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:43.521004", "step": 412, "epoch": 1 }, { "type": "loss", "content": 0.028757499530911446, "timestamp": "2025-09-10 02:16:43.523691", "step": 413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:43.556976", "step": 413, "epoch": 1 }, { "type": "loss", "content": 0.02367253229022026, "timestamp": "2025-09-10 02:16:43.562502", "step": 414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:43.595650", "step": 414, "epoch": 1 }, { "type": "loss", "content": 0.015732292085886, "timestamp": "2025-09-10 02:16:43.601340", "step": 415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.641354", "step": 415, "epoch": 1 }, { "type": "loss", "content": 0.007087147329002619, "timestamp": "2025-09-10 02:16:43.670473", "step": 416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.710572", "step": 416, "epoch": 1 }, { "type": "loss", "content": 0.017834067344665527, "timestamp": "2025-09-10 02:16:43.716218", "step": 417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:43.755943", "step": 417, "epoch": 1 }, { "type": "loss", "content": 0.01305320393294096, "timestamp": "2025-09-10 02:16:43.762054", "step": 418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.799543", "step": 418, "epoch": 1 }, { "type": "loss", "content": 0.0279587022960186, "timestamp": "2025-09-10 02:16:43.808058", "step": 419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.841203", "step": 419, "epoch": 1 }, { "type": "loss", "content": 0.04414095729589462, "timestamp": "2025-09-10 02:16:43.871907", "step": 420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:43.902653", "step": 420, "epoch": 1 }, { "type": "loss", "content": 0.037644851952791214, "timestamp": "2025-09-10 02:16:43.910969", "step": 421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:43.940973", "step": 421, "epoch": 1 }, { "type": "loss", "content": 0.019706133753061295, "timestamp": "2025-09-10 02:16:43.951822", "step": 422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:43.983401", "step": 422, "epoch": 1 }, { "type": "loss", "content": 0.011109764687716961, "timestamp": "2025-09-10 02:16:43.993594", "step": 423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:44.024238", "step": 423, "epoch": 1 }, { "type": "loss", "content": 0.01477269921451807, "timestamp": "2025-09-10 02:16:44.052407", "step": 424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:44.082738", "step": 424, "epoch": 1 }, { "type": "loss", "content": 0.005930650979280472, "timestamp": "2025-09-10 02:16:44.087197", "step": 425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:44.116986", "step": 425, "epoch": 1 }, { "type": "loss", "content": 0.011241083033382893, "timestamp": "2025-09-10 02:16:44.124430", "step": 426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:44.154303", "step": 426, "epoch": 1 }, { "type": "loss", "content": 0.01913139782845974, "timestamp": "2025-09-10 02:16:44.158484", "step": 427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:44.189053", "step": 427, "epoch": 1 }, { "type": "loss", "content": 0.013194134458899498, "timestamp": "2025-09-10 02:16:44.220251", "step": 428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:44.250583", "step": 428, "epoch": 1 }, { "type": "loss", "content": 0.004168748389929533, "timestamp": "2025-09-10 02:16:44.259190", "step": 429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:44.290390", "step": 429, "epoch": 1 }, { "type": "loss", "content": 0.004601773340255022, "timestamp": "2025-09-10 02:16:44.300694", "step": 430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:44.331481", "step": 430, "epoch": 1 }, { "type": "loss", "content": 0.011017811484634876, "timestamp": "2025-09-10 02:16:44.343647", "step": 431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:16:44.385862", "step": 431, "epoch": 1 }, { "type": "loss", "content": 0.014921742491424084, "timestamp": "2025-09-10 02:16:44.409528", "step": 432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:44.440096", "step": 432, "epoch": 1 }, { "type": "loss", "content": 0.03164242208003998, "timestamp": "2025-09-10 02:16:44.444692", "step": 433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:44.475580", "step": 433, "epoch": 1 }, { "type": "loss", "content": 0.028035728260874748, "timestamp": "2025-09-10 02:16:44.482514", "step": 434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:44.513112", "step": 434, "epoch": 1 }, { "type": "loss", "content": 0.02136605978012085, "timestamp": "2025-09-10 02:16:44.520589", "step": 435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:44.551120", "step": 435, "epoch": 1 }, { "type": "loss", "content": 0.007327110972255468, "timestamp": "2025-09-10 02:16:44.579190", "step": 436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:44.609394", "step": 436, "epoch": 1 }, { "type": "loss", "content": 0.010495754890143871, "timestamp": "2025-09-10 02:16:44.619224", "step": 437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:44.652859", "step": 437, "epoch": 1 }, { "type": "loss", "content": 0.048316050320863724, "timestamp": "2025-09-10 02:16:44.666223", "step": 438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:44.697552", "step": 438, "epoch": 1 }, { "type": "loss", "content": 0.01725853607058525, "timestamp": "2025-09-10 02:16:44.704625", "step": 439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:44.735631", "step": 439, "epoch": 1 }, { "type": "loss", "content": 0.018758054822683334, "timestamp": "2025-09-10 02:16:44.764189", "step": 440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:44.795294", "step": 440, "epoch": 1 }, { "type": "loss", "content": 0.007198534905910492, "timestamp": "2025-09-10 02:16:44.800320", "step": 441, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:16:54.824750", "step": 441, "epoch": 1 }, { "type": "pplx", "content": 15501530.366672913, "timestamp": "2025-09-10 02:16:54.827696", "step": 441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:54.858250", "step": 441, "epoch": 1 }, { "type": "loss", "content": 0.01051324512809515, "timestamp": "2025-09-10 02:16:54.862020", "step": 442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:54.895486", "step": 442, "epoch": 1 }, { "type": "loss", "content": 0.01905696466565132, "timestamp": "2025-09-10 02:16:54.899808", "step": 443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:54.931097", "step": 443, "epoch": 1 }, { "type": "loss", "content": 0.013812151737511158, "timestamp": "2025-09-10 02:16:54.956332", "step": 444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:16:54.998895", "step": 444, "epoch": 1 }, { "type": "loss", "content": 0.023278802633285522, "timestamp": "2025-09-10 02:16:55.012229", "step": 445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.043104", "step": 445, "epoch": 1 }, { "type": "loss", "content": 0.027200039476156235, "timestamp": "2025-09-10 02:16:55.047443", "step": 446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:55.077695", "step": 446, "epoch": 1 }, { "type": "loss", "content": 0.022835474461317062, "timestamp": "2025-09-10 02:16:55.083632", "step": 447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:55.115485", "step": 447, "epoch": 1 }, { "type": "loss", "content": 0.00839492492377758, "timestamp": "2025-09-10 02:16:55.149070", "step": 448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:55.179000", "step": 448, "epoch": 1 }, { "type": "loss", "content": 0.026511041447520256, "timestamp": "2025-09-10 02:16:55.187400", "step": 449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:55.223189", "step": 449, "epoch": 1 }, { "type": "loss", "content": 0.015092065557837486, "timestamp": "2025-09-10 02:16:55.230247", "step": 450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:55.259934", "step": 450, "epoch": 1 }, { "type": "loss", "content": 0.017918048426508904, "timestamp": "2025-09-10 02:16:55.267050", "step": 451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.297124", "step": 451, "epoch": 1 }, { "type": "loss", "content": 0.017638269811868668, "timestamp": "2025-09-10 02:16:55.322545", "step": 452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.351949", "step": 452, "epoch": 1 }, { "type": "loss", "content": 0.012330381199717522, "timestamp": "2025-09-10 02:16:55.354182", "step": 453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:16:55.384704", "step": 453, "epoch": 1 }, { "type": "loss", "content": 0.011708649806678295, "timestamp": "2025-09-10 02:16:55.397277", "step": 454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:55.429707", "step": 454, "epoch": 1 }, { "type": "loss", "content": 0.02566845901310444, "timestamp": "2025-09-10 02:16:55.440071", "step": 455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:55.470922", "step": 455, "epoch": 1 }, { "type": "loss", "content": 0.0025598767679184675, "timestamp": "2025-09-10 02:16:55.495870", "step": 456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:55.531353", "step": 456, "epoch": 1 }, { "type": "loss", "content": 0.012102210894227028, "timestamp": "2025-09-10 02:16:55.537883", "step": 457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.569169", "step": 457, "epoch": 1 }, { "type": "loss", "content": 0.009209878742694855, "timestamp": "2025-09-10 02:16:55.573634", "step": 458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:55.607979", "step": 458, "epoch": 1 }, { "type": "loss", "content": 0.009584350511431694, "timestamp": "2025-09-10 02:16:55.615763", "step": 459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:55.651621", "step": 459, "epoch": 1 }, { "type": "loss", "content": 0.016596131026744843, "timestamp": "2025-09-10 02:16:55.679599", "step": 460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.717580", "step": 460, "epoch": 1 }, { "type": "loss", "content": 0.02308899164199829, "timestamp": "2025-09-10 02:16:55.719799", "step": 461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.749684", "step": 461, "epoch": 1 }, { "type": "loss", "content": 0.016699977219104767, "timestamp": "2025-09-10 02:16:55.754325", "step": 462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:16:55.787359", "step": 462, "epoch": 1 }, { "type": "loss", "content": 0.02096674218773842, "timestamp": "2025-09-10 02:16:55.800723", "step": 463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:16:55.831583", "step": 463, "epoch": 1 }, { "type": "loss", "content": 0.0141488928347826, "timestamp": "2025-09-10 02:16:55.864678", "step": 464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:55.899549", "step": 464, "epoch": 1 }, { "type": "loss", "content": 0.02313445508480072, "timestamp": "2025-09-10 02:16:55.904295", "step": 465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:55.940946", "step": 465, "epoch": 1 }, { "type": "loss", "content": 0.005347964819520712, "timestamp": "2025-09-10 02:16:55.947951", "step": 466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:55.982414", "step": 466, "epoch": 1 }, { "type": "loss", "content": 0.019470447674393654, "timestamp": "2025-09-10 02:16:55.989966", "step": 467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:16:56.020181", "step": 467, "epoch": 1 }, { "type": "loss", "content": 0.01922597922384739, "timestamp": "2025-09-10 02:16:56.052020", "step": 468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:56.082140", "step": 468, "epoch": 1 }, { "type": "loss", "content": 0.02592761255800724, "timestamp": "2025-09-10 02:16:56.089958", "step": 469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:56.120311", "step": 469, "epoch": 1 }, { "type": "loss", "content": 0.022186074405908585, "timestamp": "2025-09-10 02:16:56.127526", "step": 470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:56.158541", "step": 470, "epoch": 1 }, { "type": "loss", "content": 0.022316042333841324, "timestamp": "2025-09-10 02:16:56.168674", "step": 471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:56.198880", "step": 471, "epoch": 1 }, { "type": "loss", "content": 0.007065699901431799, "timestamp": "2025-09-10 02:16:56.222178", "step": 472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:56.259536", "step": 472, "epoch": 1 }, { "type": "loss", "content": 0.016231011599302292, "timestamp": "2025-09-10 02:16:56.264118", "step": 473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:56.294749", "step": 473, "epoch": 1 }, { "type": "loss", "content": 0.024839241057634354, "timestamp": "2025-09-10 02:16:56.302103", "step": 474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:56.332452", "step": 474, "epoch": 1 }, { "type": "loss", "content": 0.04020370915532112, "timestamp": "2025-09-10 02:16:56.336544", "step": 475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:56.368297", "step": 475, "epoch": 1 }, { "type": "loss", "content": 0.017098741605877876, "timestamp": "2025-09-10 02:16:56.396144", "step": 476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:56.429707", "step": 476, "epoch": 1 }, { "type": "loss", "content": 0.014849187806248665, "timestamp": "2025-09-10 02:16:56.436676", "step": 477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:56.475817", "step": 477, "epoch": 1 }, { "type": "loss", "content": 0.019702225923538208, "timestamp": "2025-09-10 02:16:56.482832", "step": 478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:56.514808", "step": 478, "epoch": 1 }, { "type": "loss", "content": 0.027971146628260612, "timestamp": "2025-09-10 02:16:56.524882", "step": 479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:56.556254", "step": 479, "epoch": 1 }, { "type": "loss", "content": 0.00662533612921834, "timestamp": "2025-09-10 02:16:56.581638", "step": 480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:16:56.612469", "step": 480, "epoch": 1 }, { "type": "loss", "content": 0.017041940242052078, "timestamp": "2025-09-10 02:16:56.618341", "step": 481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:56.650763", "step": 481, "epoch": 1 }, { "type": "loss", "content": 0.0072524151764810085, "timestamp": "2025-09-10 02:16:56.658474", "step": 482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:16:56.691076", "step": 482, "epoch": 1 }, { "type": "loss", "content": 0.01302304957062006, "timestamp": "2025-09-10 02:16:56.698078", "step": 483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:16:56.733662", "step": 483, "epoch": 1 }, { "type": "loss", "content": 0.008182940073311329, "timestamp": "2025-09-10 02:16:56.764429", "step": 484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:56.795179", "step": 484, "epoch": 1 }, { "type": "loss", "content": 0.02642636187374592, "timestamp": "2025-09-10 02:16:56.797355", "step": 485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:16:56.836091", "step": 485, "epoch": 1 }, { "type": "loss", "content": 0.025703372433781624, "timestamp": "2025-09-10 02:16:56.851817", "step": 486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:56.882364", "step": 486, "epoch": 1 }, { "type": "loss", "content": 0.006586894392967224, "timestamp": "2025-09-10 02:16:56.889256", "step": 487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:56.920015", "step": 487, "epoch": 1 }, { "type": "loss", "content": 0.03618357703089714, "timestamp": "2025-09-10 02:16:56.947737", "step": 488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:56.980013", "step": 488, "epoch": 1 }, { "type": "loss", "content": 0.02346952259540558, "timestamp": "2025-09-10 02:16:56.985354", "step": 489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:16:57.031808", "step": 489, "epoch": 1 }, { "type": "loss", "content": 0.02613210491836071, "timestamp": "2025-09-10 02:16:57.045157", "step": 490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:57.074931", "step": 490, "epoch": 1 }, { "type": "loss", "content": 0.013658554293215275, "timestamp": "2025-09-10 02:16:57.079421", "step": 491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:16:57.113555", "step": 491, "epoch": 1 }, { "type": "loss", "content": 0.023100513964891434, "timestamp": "2025-09-10 02:16:57.148219", "step": 492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:16:57.179933", "step": 492, "epoch": 1 }, { "type": "loss", "content": 0.010461096651852131, "timestamp": "2025-09-10 02:16:57.185393", "step": 493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:16:57.216369", "step": 493, "epoch": 1 }, { "type": "loss", "content": 0.01992633379995823, "timestamp": "2025-09-10 02:16:57.220455", "step": 494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:16:57.251240", "step": 494, "epoch": 1 }, { "type": "loss", "content": 0.008644700050354004, "timestamp": "2025-09-10 02:16:57.257576", "step": 495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:16:57.287936", "step": 495, "epoch": 1 }, { "type": "loss", "content": 0.018692122772336006, "timestamp": "2025-09-10 02:16:57.313229", "step": 496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:16:57.343998", "step": 496, "epoch": 1 }, { "type": "loss", "content": 0.010719933547079563, "timestamp": "2025-09-10 02:16:57.346013", "step": 497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:16:57.385187", "step": 497, "epoch": 1 }, { "type": "loss", "content": 0.014836416579782963, "timestamp": "2025-09-10 02:16:57.401132", "step": 498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:16:57.439967", "step": 498, "epoch": 1 }, { "type": "loss", "content": 0.009858435951173306, "timestamp": "2025-09-10 02:16:57.444529", "step": 499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:16:57.484973", "step": 499, "epoch": 1 }, { "type": "loss", "content": 0.014942965470254421, "timestamp": "2025-09-10 02:16:57.510192", "step": 500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 500", "timestamp": "2025-09-10 02:17:02.717209", "step": 500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:02.750251", "step": 500, "epoch": 1 }, { "type": "loss", "content": 0.010459995828568935, "timestamp": "2025-09-10 02:17:02.754425", "step": 501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:02.786235", "step": 501, "epoch": 1 }, { "type": "loss", "content": 0.00723966583609581, "timestamp": "2025-09-10 02:17:02.795255", "step": 502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:02.832596", "step": 502, "epoch": 1 }, { "type": "loss", "content": 0.023343030363321304, "timestamp": "2025-09-10 02:17:02.839413", "step": 503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:02.871628", "step": 503, "epoch": 1 }, { "type": "loss", "content": 0.01857823319733143, "timestamp": "2025-09-10 02:17:02.902391", "step": 504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:02.935396", "step": 504, "epoch": 1 }, { "type": "loss", "content": 0.017975622788071632, "timestamp": "2025-09-10 02:17:02.948532", "step": 505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:02.994342", "step": 505, "epoch": 1 }, { "type": "loss", "content": 0.029632670804858208, "timestamp": "2025-09-10 02:17:03.002054", "step": 506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:03.042887", "step": 506, "epoch": 1 }, { "type": "loss", "content": 0.030040541663765907, "timestamp": "2025-09-10 02:17:03.050694", "step": 507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:03.081641", "step": 507, "epoch": 1 }, { "type": "loss", "content": 0.010655703954398632, "timestamp": "2025-09-10 02:17:03.110342", "step": 508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:03.141107", "step": 508, "epoch": 1 }, { "type": "loss", "content": 0.010932376608252525, "timestamp": "2025-09-10 02:17:03.150935", "step": 509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:03.189097", "step": 509, "epoch": 1 }, { "type": "loss", "content": 0.016881374642252922, "timestamp": "2025-09-10 02:17:03.201651", "step": 510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:03.238792", "step": 510, "epoch": 1 }, { "type": "loss", "content": 0.011083531193435192, "timestamp": "2025-09-10 02:17:03.252584", "step": 511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:03.288534", "step": 511, "epoch": 1 }, { "type": "loss", "content": 0.023147176951169968, "timestamp": "2025-09-10 02:17:03.319590", "step": 512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:03.365430", "step": 512, "epoch": 1 }, { "type": "loss", "content": 0.03139190003275871, "timestamp": "2025-09-10 02:17:03.370617", "step": 513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:03.420989", "step": 513, "epoch": 1 }, { "type": "loss", "content": 0.02452153153717518, "timestamp": "2025-09-10 02:17:03.436882", "step": 514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:03.469459", "step": 514, "epoch": 1 }, { "type": "loss", "content": 0.012951391749083996, "timestamp": "2025-09-10 02:17:03.476526", "step": 515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:03.523716", "step": 515, "epoch": 1 }, { "type": "loss", "content": 0.014777913689613342, "timestamp": "2025-09-10 02:17:03.558303", "step": 516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:03.591745", "step": 516, "epoch": 1 }, { "type": "loss", "content": 0.03574973717331886, "timestamp": "2025-09-10 02:17:03.604408", "step": 517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:03.634689", "step": 517, "epoch": 1 }, { "type": "loss", "content": 0.009790212847292423, "timestamp": "2025-09-10 02:17:03.641426", "step": 518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:03.676660", "step": 518, "epoch": 1 }, { "type": "loss", "content": 0.02090480551123619, "timestamp": "2025-09-10 02:17:03.688328", "step": 519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:03.725888", "step": 519, "epoch": 1 }, { "type": "loss", "content": 0.0349605493247509, "timestamp": "2025-09-10 02:17:03.750814", "step": 520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:03.784064", "step": 520, "epoch": 1 }, { "type": "loss", "content": 0.010910294018685818, "timestamp": "2025-09-10 02:17:03.789203", "step": 521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:03.821200", "step": 521, "epoch": 1 }, { "type": "loss", "content": 0.013309179805219173, "timestamp": "2025-09-10 02:17:03.825439", "step": 522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:03.856521", "step": 522, "epoch": 1 }, { "type": "loss", "content": 0.028638780117034912, "timestamp": "2025-09-10 02:17:03.868386", "step": 523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:03.900181", "step": 523, "epoch": 1 }, { "type": "loss", "content": 0.009006711654365063, "timestamp": "2025-09-10 02:17:03.928643", "step": 524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:03.958522", "step": 524, "epoch": 1 }, { "type": "loss", "content": 0.02321997843682766, "timestamp": "2025-09-10 02:17:03.963456", "step": 525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:03.995505", "step": 525, "epoch": 1 }, { "type": "loss", "content": 0.02315063215792179, "timestamp": "2025-09-10 02:17:04.007499", "step": 526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:04.053767", "step": 526, "epoch": 1 }, { "type": "loss", "content": 0.015552300028502941, "timestamp": "2025-09-10 02:17:04.060843", "step": 527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:04.092583", "step": 527, "epoch": 1 }, { "type": "loss", "content": 0.029168089851737022, "timestamp": "2025-09-10 02:17:04.123678", "step": 528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:04.157849", "step": 528, "epoch": 1 }, { "type": "loss", "content": 0.003137963591143489, "timestamp": "2025-09-10 02:17:04.163087", "step": 529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:04.196961", "step": 529, "epoch": 1 }, { "type": "loss", "content": 0.03553525730967522, "timestamp": "2025-09-10 02:17:04.204759", "step": 530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:04.239239", "step": 530, "epoch": 1 }, { "type": "loss", "content": 0.022633006796240807, "timestamp": "2025-09-10 02:17:04.251415", "step": 531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:04.282407", "step": 531, "epoch": 1 }, { "type": "loss", "content": 0.016931835561990738, "timestamp": "2025-09-10 02:17:04.311018", "step": 532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:17:04.351459", "step": 532, "epoch": 1 }, { "type": "loss", "content": 0.010154195129871368, "timestamp": "2025-09-10 02:17:04.368471", "step": 533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:04.402207", "step": 533, "epoch": 1 }, { "type": "loss", "content": 0.035087209194898605, "timestamp": "2025-09-10 02:17:04.409146", "step": 534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:04.439763", "step": 534, "epoch": 1 }, { "type": "loss", "content": 0.013602891936898232, "timestamp": "2025-09-10 02:17:04.444187", "step": 535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 688 ], "flops": 20408222954560 }, "timestamp": "2025-09-10 02:17:04.512971", "step": 535, "epoch": 1 }, { "type": "loss", "content": 0.010147054679691792, "timestamp": "2025-09-10 02:17:04.557938", "step": 536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:04.588485", "step": 536, "epoch": 1 }, { "type": "loss", "content": 0.009458757936954498, "timestamp": "2025-09-10 02:17:04.598718", "step": 537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:04.632882", "step": 537, "epoch": 1 }, { "type": "loss", "content": 0.01887761428952217, "timestamp": "2025-09-10 02:17:04.640519", "step": 538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:04.672166", "step": 538, "epoch": 1 }, { "type": "loss", "content": 0.013571500778198242, "timestamp": "2025-09-10 02:17:04.679851", "step": 539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:04.710094", "step": 539, "epoch": 1 }, { "type": "loss", "content": 0.018864035606384277, "timestamp": "2025-09-10 02:17:04.734715", "step": 540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:04.766172", "step": 540, "epoch": 1 }, { "type": "loss", "content": 0.014328965917229652, "timestamp": "2025-09-10 02:17:04.775922", "step": 541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:04.807076", "step": 541, "epoch": 1 }, { "type": "loss", "content": 0.007837352342903614, "timestamp": "2025-09-10 02:17:04.814804", "step": 542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:04.848106", "step": 542, "epoch": 1 }, { "type": "loss", "content": 0.020064374431967735, "timestamp": "2025-09-10 02:17:04.855243", "step": 543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:04.895354", "step": 543, "epoch": 1 }, { "type": "loss", "content": 0.027344727888703346, "timestamp": "2025-09-10 02:17:04.931857", "step": 544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:04.962318", "step": 544, "epoch": 1 }, { "type": "loss", "content": 0.035422343760728836, "timestamp": "2025-09-10 02:17:04.969466", "step": 545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-10 02:17:05.026681", "step": 545, "epoch": 1 }, { "type": "loss", "content": 0.009574404917657375, "timestamp": "2025-09-10 02:17:05.046057", "step": 546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:05.083897", "step": 546, "epoch": 1 }, { "type": "loss", "content": 0.012771585024893284, "timestamp": "2025-09-10 02:17:05.090641", "step": 547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:05.123234", "step": 547, "epoch": 1 }, { "type": "loss", "content": 0.007596482522785664, "timestamp": "2025-09-10 02:17:05.154316", "step": 548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:05.185184", "step": 548, "epoch": 1 }, { "type": "loss", "content": 0.029550552368164062, "timestamp": "2025-09-10 02:17:05.189804", "step": 549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.221572", "step": 549, "epoch": 1 }, { "type": "loss", "content": 0.005684220232069492, "timestamp": "2025-09-10 02:17:05.226030", "step": 550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:05.259344", "step": 550, "epoch": 1 }, { "type": "loss", "content": 0.020296234637498856, "timestamp": "2025-09-10 02:17:05.271859", "step": 551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:05.306189", "step": 551, "epoch": 1 }, { "type": "loss", "content": 0.011387856677174568, "timestamp": "2025-09-10 02:17:05.339356", "step": 552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:05.370234", "step": 552, "epoch": 1 }, { "type": "loss", "content": 0.005133692175149918, "timestamp": "2025-09-10 02:17:05.374915", "step": 553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:05.405630", "step": 553, "epoch": 1 }, { "type": "loss", "content": 0.015191650949418545, "timestamp": "2025-09-10 02:17:05.417754", "step": 554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.449245", "step": 554, "epoch": 1 }, { "type": "loss", "content": 0.01479465514421463, "timestamp": "2025-09-10 02:17:05.456377", "step": 555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.492055", "step": 555, "epoch": 1 }, { "type": "loss", "content": 0.00847632810473442, "timestamp": "2025-09-10 02:17:05.517533", "step": 556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.558071", "step": 556, "epoch": 1 }, { "type": "loss", "content": 0.020174086093902588, "timestamp": "2025-09-10 02:17:05.560370", "step": 557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:05.603236", "step": 557, "epoch": 1 }, { "type": "loss", "content": 0.008243663236498833, "timestamp": "2025-09-10 02:17:05.617199", "step": 558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.650878", "step": 558, "epoch": 1 }, { "type": "loss", "content": 0.014073808677494526, "timestamp": "2025-09-10 02:17:05.655400", "step": 559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:05.685836", "step": 559, "epoch": 1 }, { "type": "loss", "content": 0.012161211110651493, "timestamp": "2025-09-10 02:17:05.711317", "step": 560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:05.744726", "step": 560, "epoch": 1 }, { "type": "loss", "content": 0.0253736712038517, "timestamp": "2025-09-10 02:17:05.753532", "step": 561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:05.787108", "step": 561, "epoch": 1 }, { "type": "loss", "content": 0.00579653587192297, "timestamp": "2025-09-10 02:17:05.799398", "step": 562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:05.844748", "step": 562, "epoch": 1 }, { "type": "loss", "content": 0.0027781727258116007, "timestamp": "2025-09-10 02:17:05.858553", "step": 563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:05.894663", "step": 563, "epoch": 1 }, { "type": "loss", "content": 0.035328906029462814, "timestamp": "2025-09-10 02:17:05.924916", "step": 564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:05.962930", "step": 564, "epoch": 1 }, { "type": "loss", "content": 0.00891299732029438, "timestamp": "2025-09-10 02:17:05.965304", "step": 565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:05.998336", "step": 565, "epoch": 1 }, { "type": "loss", "content": 0.013710664585232735, "timestamp": "2025-09-10 02:17:06.008845", "step": 566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:06.039916", "step": 566, "epoch": 1 }, { "type": "loss", "content": 0.00687979394569993, "timestamp": "2025-09-10 02:17:06.046706", "step": 567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:06.077841", "step": 567, "epoch": 1 }, { "type": "loss", "content": 0.009851823560893536, "timestamp": "2025-09-10 02:17:06.106375", "step": 568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:06.137339", "step": 568, "epoch": 1 }, { "type": "loss", "content": 0.012755611911416054, "timestamp": "2025-09-10 02:17:06.143314", "step": 569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:06.174764", "step": 569, "epoch": 1 }, { "type": "loss", "content": 0.005232820753008127, "timestamp": "2025-09-10 02:17:06.185075", "step": 570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:06.216416", "step": 570, "epoch": 1 }, { "type": "loss", "content": 0.025471851229667664, "timestamp": "2025-09-10 02:17:06.220670", "step": 571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:06.261098", "step": 571, "epoch": 1 }, { "type": "loss", "content": 0.016110900789499283, "timestamp": "2025-09-10 02:17:06.289501", "step": 572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:17:06.336757", "step": 572, "epoch": 1 }, { "type": "loss", "content": 0.018473317846655846, "timestamp": "2025-09-10 02:17:06.353491", "step": 573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:06.385507", "step": 573, "epoch": 1 }, { "type": "loss", "content": 0.014140649698674679, "timestamp": "2025-09-10 02:17:06.398092", "step": 574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:06.429189", "step": 574, "epoch": 1 }, { "type": "loss", "content": 0.011097794398665428, "timestamp": "2025-09-10 02:17:06.435977", "step": 575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:06.466194", "step": 575, "epoch": 1 }, { "type": "loss", "content": 0.01416011806577444, "timestamp": "2025-09-10 02:17:06.493932", "step": 576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:06.545475", "step": 576, "epoch": 1 }, { "type": "loss", "content": 0.014164241962134838, "timestamp": "2025-09-10 02:17:06.553210", "step": 577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:06.591569", "step": 577, "epoch": 1 }, { "type": "loss", "content": 0.04556810483336449, "timestamp": "2025-09-10 02:17:06.604998", "step": 578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:06.636536", "step": 578, "epoch": 1 }, { "type": "loss", "content": 0.010820058174431324, "timestamp": "2025-09-10 02:17:06.639940", "step": 579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:06.673852", "step": 579, "epoch": 1 }, { "type": "loss", "content": 0.0034028550144284964, "timestamp": "2025-09-10 02:17:06.704601", "step": 580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:06.736412", "step": 580, "epoch": 1 }, { "type": "loss", "content": 0.016602005809545517, "timestamp": "2025-09-10 02:17:06.741634", "step": 581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:06.771605", "step": 581, "epoch": 1 }, { "type": "loss", "content": 0.05845152586698532, "timestamp": "2025-09-10 02:17:06.776491", "step": 582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:06.808863", "step": 582, "epoch": 1 }, { "type": "loss", "content": 0.03493461757898331, "timestamp": "2025-09-10 02:17:06.814928", "step": 583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:06.846547", "step": 583, "epoch": 1 }, { "type": "loss", "content": 0.021650012582540512, "timestamp": "2025-09-10 02:17:06.879990", "step": 584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:06.913612", "step": 584, "epoch": 1 }, { "type": "loss", "content": 0.0019279540283605456, "timestamp": "2025-09-10 02:17:06.918165", "step": 585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:06.949617", "step": 585, "epoch": 1 }, { "type": "loss", "content": 0.01664150133728981, "timestamp": "2025-09-10 02:17:06.960220", "step": 586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:06.992181", "step": 586, "epoch": 1 }, { "type": "loss", "content": 0.02184317074716091, "timestamp": "2025-09-10 02:17:06.998923", "step": 587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:07.043515", "step": 587, "epoch": 1 }, { "type": "loss", "content": 0.004326535388827324, "timestamp": "2025-09-10 02:17:07.071694", "step": 588, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:17:17.297865", "step": 588, "epoch": 1 }, { "type": "pplx", "content": 17933771.412629146, "timestamp": "2025-09-10 02:17:17.300864", "step": 588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 816 ], "flops": 24205052762688 }, "timestamp": "2025-09-10 02:17:17.369599", "step": 588, "epoch": 1 }, { "type": "loss", "content": 0.009856624528765678, "timestamp": "2025-09-10 02:17:17.397874", "step": 589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:17.433590", "step": 589, "epoch": 1 }, { "type": "loss", "content": 0.017127353698015213, "timestamp": "2025-09-10 02:17:17.440647", "step": 590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:17.472940", "step": 590, "epoch": 1 }, { "type": "loss", "content": 0.011390717700123787, "timestamp": "2025-09-10 02:17:17.482793", "step": 591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:17.514056", "step": 591, "epoch": 1 }, { "type": "loss", "content": 0.014447472058236599, "timestamp": "2025-09-10 02:17:17.546534", "step": 592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:17.579394", "step": 592, "epoch": 1 }, { "type": "loss", "content": 0.003005419624969363, "timestamp": "2025-09-10 02:17:17.583121", "step": 593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:17.615124", "step": 593, "epoch": 1 }, { "type": "loss", "content": 0.00944295059889555, "timestamp": "2025-09-10 02:17:17.622720", "step": 594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:17.660679", "step": 594, "epoch": 1 }, { "type": "loss", "content": 0.009788398630917072, "timestamp": "2025-09-10 02:17:17.665069", "step": 595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:17.701220", "step": 595, "epoch": 1 }, { "type": "loss", "content": 0.012333549559116364, "timestamp": "2025-09-10 02:17:17.736145", "step": 596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:17.769079", "step": 596, "epoch": 1 }, { "type": "loss", "content": 0.01467831339687109, "timestamp": "2025-09-10 02:17:17.773289", "step": 597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:17.807430", "step": 597, "epoch": 1 }, { "type": "loss", "content": 0.03252703696489334, "timestamp": "2025-09-10 02:17:17.814593", "step": 598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:17.849955", "step": 598, "epoch": 1 }, { "type": "loss", "content": 0.012095707468688488, "timestamp": "2025-09-10 02:17:17.857669", "step": 599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:17.890183", "step": 599, "epoch": 1 }, { "type": "loss", "content": 0.03608888015151024, "timestamp": "2025-09-10 02:17:17.918339", "step": 600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:17.951914", "step": 600, "epoch": 1 }, { "type": "loss", "content": 0.0019412686815485358, "timestamp": "2025-09-10 02:17:17.956404", "step": 601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:17.986749", "step": 601, "epoch": 1 }, { "type": "loss", "content": 0.015688760206103325, "timestamp": "2025-09-10 02:17:17.993894", "step": 602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:18.026557", "step": 602, "epoch": 1 }, { "type": "loss", "content": 0.013548861257731915, "timestamp": "2025-09-10 02:17:18.038440", "step": 603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:18.070533", "step": 603, "epoch": 1 }, { "type": "loss", "content": 0.045547544956207275, "timestamp": "2025-09-10 02:17:18.103997", "step": 604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:18.147664", "step": 604, "epoch": 1 }, { "type": "loss", "content": 0.029206562787294388, "timestamp": "2025-09-10 02:17:18.160822", "step": 605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:18.200358", "step": 605, "epoch": 1 }, { "type": "loss", "content": 0.0025844343472272158, "timestamp": "2025-09-10 02:17:18.207248", "step": 606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:18.239358", "step": 606, "epoch": 1 }, { "type": "loss", "content": 0.008630426600575447, "timestamp": "2025-09-10 02:17:18.249893", "step": 607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:18.283006", "step": 607, "epoch": 1 }, { "type": "loss", "content": 0.0020521271508187056, "timestamp": "2025-09-10 02:17:18.308137", "step": 608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:18.341145", "step": 608, "epoch": 1 }, { "type": "loss", "content": 0.0035357114393264055, "timestamp": "2025-09-10 02:17:18.345327", "step": 609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:18.377034", "step": 609, "epoch": 1 }, { "type": "loss", "content": 0.006719955708831549, "timestamp": "2025-09-10 02:17:18.384473", "step": 610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:18.417171", "step": 610, "epoch": 1 }, { "type": "loss", "content": 0.004843573085963726, "timestamp": "2025-09-10 02:17:18.421468", "step": 611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:18.454741", "step": 611, "epoch": 1 }, { "type": "loss", "content": 0.023189399391412735, "timestamp": "2025-09-10 02:17:18.485520", "step": 612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:18.521681", "step": 612, "epoch": 1 }, { "type": "loss", "content": 0.005349505692720413, "timestamp": "2025-09-10 02:17:18.534378", "step": 613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:18.566683", "step": 613, "epoch": 1 }, { "type": "loss", "content": 0.0026773291174322367, "timestamp": "2025-09-10 02:17:18.578531", "step": 614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:18.610775", "step": 614, "epoch": 1 }, { "type": "loss", "content": 0.02543543465435505, "timestamp": "2025-09-10 02:17:18.614897", "step": 615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:18.650542", "step": 615, "epoch": 1 }, { "type": "loss", "content": 0.006155446171760559, "timestamp": "2025-09-10 02:17:18.685120", "step": 616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:18.718426", "step": 616, "epoch": 1 }, { "type": "loss", "content": 0.012057982385158539, "timestamp": "2025-09-10 02:17:18.723346", "step": 617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:18.757126", "step": 617, "epoch": 1 }, { "type": "loss", "content": 0.0031552365981042385, "timestamp": "2025-09-10 02:17:18.764412", "step": 618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:18.796501", "step": 618, "epoch": 1 }, { "type": "loss", "content": 0.024341052398085594, "timestamp": "2025-09-10 02:17:18.804106", "step": 619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:18.839158", "step": 619, "epoch": 1 }, { "type": "loss", "content": 0.0073067969642579556, "timestamp": "2025-09-10 02:17:18.869850", "step": 620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:18.902496", "step": 620, "epoch": 1 }, { "type": "loss", "content": 0.018821122124791145, "timestamp": "2025-09-10 02:17:18.910508", "step": 621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:18.943708", "step": 621, "epoch": 1 }, { "type": "loss", "content": 0.0015981352189555764, "timestamp": "2025-09-10 02:17:18.947909", "step": 622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:18.981778", "step": 622, "epoch": 1 }, { "type": "loss", "content": 0.010449434630572796, "timestamp": "2025-09-10 02:17:18.989279", "step": 623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:17:19.021145", "step": 623, "epoch": 1 }, { "type": "loss", "content": 0.006673470605164766, "timestamp": "2025-09-10 02:17:19.046217", "step": 624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:19.077682", "step": 624, "epoch": 1 }, { "type": "loss", "content": 0.027125883847475052, "timestamp": "2025-09-10 02:17:19.081932", "step": 625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:19.111910", "step": 625, "epoch": 1 }, { "type": "loss", "content": 0.00828898511826992, "timestamp": "2025-09-10 02:17:19.115841", "step": 626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:19.148853", "step": 626, "epoch": 1 }, { "type": "loss", "content": 0.0038712245877832174, "timestamp": "2025-09-10 02:17:19.156549", "step": 627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:19.188531", "step": 627, "epoch": 1 }, { "type": "loss", "content": 0.012664406560361385, "timestamp": "2025-09-10 02:17:19.219467", "step": 628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:19.251808", "step": 628, "epoch": 1 }, { "type": "loss", "content": 0.03356235474348068, "timestamp": "2025-09-10 02:17:19.256337", "step": 629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:19.288914", "step": 629, "epoch": 1 }, { "type": "loss", "content": 0.027829742059111595, "timestamp": "2025-09-10 02:17:19.300893", "step": 630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:19.334244", "step": 630, "epoch": 1 }, { "type": "loss", "content": 0.009094549342989922, "timestamp": "2025-09-10 02:17:19.341336", "step": 631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:19.373190", "step": 631, "epoch": 1 }, { "type": "loss", "content": 0.0018705466063693166, "timestamp": "2025-09-10 02:17:19.401577", "step": 632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:19.433749", "step": 632, "epoch": 1 }, { "type": "loss", "content": 0.017098234966397285, "timestamp": "2025-09-10 02:17:19.438572", "step": 633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:19.472952", "step": 633, "epoch": 1 }, { "type": "loss", "content": 0.052354682236909866, "timestamp": "2025-09-10 02:17:19.480464", "step": 634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:19.513654", "step": 634, "epoch": 1 }, { "type": "loss", "content": 0.055087942630052567, "timestamp": "2025-09-10 02:17:19.520295", "step": 635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:19.553748", "step": 635, "epoch": 1 }, { "type": "loss", "content": 0.005113348830491304, "timestamp": "2025-09-10 02:17:19.586415", "step": 636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:19.618032", "step": 636, "epoch": 1 }, { "type": "loss", "content": 0.05106746777892113, "timestamp": "2025-09-10 02:17:19.622994", "step": 637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:19.660180", "step": 637, "epoch": 1 }, { "type": "loss", "content": 0.0263382438570261, "timestamp": "2025-09-10 02:17:19.662427", "step": 638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:19.693860", "step": 638, "epoch": 1 }, { "type": "loss", "content": 0.007414556574076414, "timestamp": "2025-09-10 02:17:19.700726", "step": 639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:19.733055", "step": 639, "epoch": 1 }, { "type": "loss", "content": 0.001413815887644887, "timestamp": "2025-09-10 02:17:19.766419", "step": 640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:19.798671", "step": 640, "epoch": 1 }, { "type": "loss", "content": 0.021783774718642235, "timestamp": "2025-09-10 02:17:19.808756", "step": 641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:19.840085", "step": 641, "epoch": 1 }, { "type": "loss", "content": 0.027584636583924294, "timestamp": "2025-09-10 02:17:19.846841", "step": 642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:19.879816", "step": 642, "epoch": 1 }, { "type": "loss", "content": 0.03216005116701126, "timestamp": "2025-09-10 02:17:19.887161", "step": 643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:19.919658", "step": 643, "epoch": 1 }, { "type": "loss", "content": 0.036754488945007324, "timestamp": "2025-09-10 02:17:19.951670", "step": 644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:19.984226", "step": 644, "epoch": 1 }, { "type": "loss", "content": 0.009167312644422054, "timestamp": "2025-09-10 02:17:19.986908", "step": 645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:20.020646", "step": 645, "epoch": 1 }, { "type": "loss", "content": 0.015368753112852573, "timestamp": "2025-09-10 02:17:20.027113", "step": 646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:20.067572", "step": 646, "epoch": 1 }, { "type": "loss", "content": 0.025697126984596252, "timestamp": "2025-09-10 02:17:20.071649", "step": 647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:20.104098", "step": 647, "epoch": 1 }, { "type": "loss", "content": 0.0037650710437446833, "timestamp": "2025-09-10 02:17:20.132598", "step": 648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:20.164187", "step": 648, "epoch": 1 }, { "type": "loss", "content": 0.010932421311736107, "timestamp": "2025-09-10 02:17:20.168755", "step": 649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:20.206320", "step": 649, "epoch": 1 }, { "type": "loss", "content": 0.022069621831178665, "timestamp": "2025-09-10 02:17:20.221952", "step": 650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:20.253942", "step": 650, "epoch": 1 }, { "type": "loss", "content": 0.0029460687655955553, "timestamp": "2025-09-10 02:17:20.261051", "step": 651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:20.291356", "step": 651, "epoch": 1 }, { "type": "loss", "content": 0.01202553603798151, "timestamp": "2025-09-10 02:17:20.319792", "step": 652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:20.357378", "step": 652, "epoch": 1 }, { "type": "loss", "content": 0.0178877804428339, "timestamp": "2025-09-10 02:17:20.370475", "step": 653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:20.400728", "step": 653, "epoch": 1 }, { "type": "loss", "content": 0.02879807911813259, "timestamp": "2025-09-10 02:17:20.407758", "step": 654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:20.439357", "step": 654, "epoch": 1 }, { "type": "loss", "content": 0.014536075294017792, "timestamp": "2025-09-10 02:17:20.449146", "step": 655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:20.479745", "step": 655, "epoch": 1 }, { "type": "loss", "content": 0.005087182391434908, "timestamp": "2025-09-10 02:17:20.512393", "step": 656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:20.542746", "step": 656, "epoch": 1 }, { "type": "loss", "content": 0.021268010139465332, "timestamp": "2025-09-10 02:17:20.552947", "step": 657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:20.584319", "step": 657, "epoch": 1 }, { "type": "loss", "content": 0.03790181875228882, "timestamp": "2025-09-10 02:17:20.591566", "step": 658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:20.623381", "step": 658, "epoch": 1 }, { "type": "loss", "content": 0.01340021938085556, "timestamp": "2025-09-10 02:17:20.630847", "step": 659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:20.660721", "step": 659, "epoch": 1 }, { "type": "loss", "content": 0.04001796990633011, "timestamp": "2025-09-10 02:17:20.689358", "step": 660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:20.720306", "step": 660, "epoch": 1 }, { "type": "loss", "content": 0.014666594564914703, "timestamp": "2025-09-10 02:17:20.724796", "step": 661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:20.755743", "step": 661, "epoch": 1 }, { "type": "loss", "content": 0.015082466416060925, "timestamp": "2025-09-10 02:17:20.763048", "step": 662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:20.793259", "step": 662, "epoch": 1 }, { "type": "loss", "content": 0.02892708219587803, "timestamp": "2025-09-10 02:17:20.800964", "step": 663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:20.831799", "step": 663, "epoch": 1 }, { "type": "loss", "content": 0.015628747642040253, "timestamp": "2025-09-10 02:17:20.864736", "step": 664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:20.896146", "step": 664, "epoch": 1 }, { "type": "loss", "content": 0.00712405052036047, "timestamp": "2025-09-10 02:17:20.900645", "step": 665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:20.930963", "step": 665, "epoch": 1 }, { "type": "loss", "content": 0.026593917980790138, "timestamp": "2025-09-10 02:17:20.937907", "step": 666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:20.968190", "step": 666, "epoch": 1 }, { "type": "loss", "content": 0.024430980905890465, "timestamp": "2025-09-10 02:17:20.979065", "step": 667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:21.009583", "step": 667, "epoch": 1 }, { "type": "loss", "content": 0.00685026403516531, "timestamp": "2025-09-10 02:17:21.034329", "step": 668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:21.064108", "step": 668, "epoch": 1 }, { "type": "loss", "content": 0.017591923475265503, "timestamp": "2025-09-10 02:17:21.068756", "step": 669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:21.100801", "step": 669, "epoch": 1 }, { "type": "loss", "content": 0.037963759154081345, "timestamp": "2025-09-10 02:17:21.108507", "step": 670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:21.139238", "step": 670, "epoch": 1 }, { "type": "loss", "content": 0.010039789602160454, "timestamp": "2025-09-10 02:17:21.146744", "step": 671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:21.177482", "step": 671, "epoch": 1 }, { "type": "loss", "content": 0.009369760751724243, "timestamp": "2025-09-10 02:17:21.208546", "step": 672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.239684", "step": 672, "epoch": 1 }, { "type": "loss", "content": 0.021921101957559586, "timestamp": "2025-09-10 02:17:21.244528", "step": 673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:17:21.283405", "step": 673, "epoch": 1 }, { "type": "loss", "content": 0.031845226883888245, "timestamp": "2025-09-10 02:17:21.299593", "step": 674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:21.330395", "step": 674, "epoch": 1 }, { "type": "loss", "content": 0.018058914691209793, "timestamp": "2025-09-10 02:17:21.337228", "step": 675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:21.367685", "step": 675, "epoch": 1 }, { "type": "loss", "content": 0.014186178334057331, "timestamp": "2025-09-10 02:17:21.392579", "step": 676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:21.422596", "step": 676, "epoch": 1 }, { "type": "loss", "content": 0.009794117882847786, "timestamp": "2025-09-10 02:17:21.427139", "step": 677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.461444", "step": 677, "epoch": 1 }, { "type": "loss", "content": 0.02925429679453373, "timestamp": "2025-09-10 02:17:21.468343", "step": 678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:21.498941", "step": 678, "epoch": 1 }, { "type": "loss", "content": 0.006639067083597183, "timestamp": "2025-09-10 02:17:21.506197", "step": 679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:21.539972", "step": 679, "epoch": 1 }, { "type": "loss", "content": 0.012162303552031517, "timestamp": "2025-09-10 02:17:21.574487", "step": 680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:21.606403", "step": 680, "epoch": 1 }, { "type": "loss", "content": 0.01577383652329445, "timestamp": "2025-09-10 02:17:21.608542", "step": 681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.638652", "step": 681, "epoch": 1 }, { "type": "loss", "content": 0.01001597661525011, "timestamp": "2025-09-10 02:17:21.645573", "step": 682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:21.676435", "step": 682, "epoch": 1 }, { "type": "loss", "content": 0.027138683944940567, "timestamp": "2025-09-10 02:17:21.684229", "step": 683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:21.714391", "step": 683, "epoch": 1 }, { "type": "loss", "content": 0.01829609088599682, "timestamp": "2025-09-10 02:17:21.742189", "step": 684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:21.778582", "step": 684, "epoch": 1 }, { "type": "loss", "content": 0.020318562164902687, "timestamp": "2025-09-10 02:17:21.793784", "step": 685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:21.824562", "step": 685, "epoch": 1 }, { "type": "loss", "content": 0.025962335988879204, "timestamp": "2025-09-10 02:17:21.832278", "step": 686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.864754", "step": 686, "epoch": 1 }, { "type": "loss", "content": 0.022130966186523438, "timestamp": "2025-09-10 02:17:21.871795", "step": 687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.908649", "step": 687, "epoch": 1 }, { "type": "loss", "content": 0.020423393696546555, "timestamp": "2025-09-10 02:17:21.936467", "step": 688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:21.969398", "step": 688, "epoch": 1 }, { "type": "loss", "content": 0.025996601209044456, "timestamp": "2025-09-10 02:17:21.974116", "step": 689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:22.008827", "step": 689, "epoch": 1 }, { "type": "loss", "content": 0.013769307173788548, "timestamp": "2025-09-10 02:17:22.021168", "step": 690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:22.051835", "step": 690, "epoch": 1 }, { "type": "loss", "content": 0.01184395607560873, "timestamp": "2025-09-10 02:17:22.058527", "step": 691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:22.089604", "step": 691, "epoch": 1 }, { "type": "loss", "content": 0.013138137757778168, "timestamp": "2025-09-10 02:17:22.117290", "step": 692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:22.148020", "step": 692, "epoch": 1 }, { "type": "loss", "content": 0.02101938985288143, "timestamp": "2025-09-10 02:17:22.152625", "step": 693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:22.182555", "step": 693, "epoch": 1 }, { "type": "loss", "content": 0.008636675775051117, "timestamp": "2025-09-10 02:17:22.190279", "step": 694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:22.220932", "step": 694, "epoch": 1 }, { "type": "loss", "content": 0.014400548301637173, "timestamp": "2025-09-10 02:17:22.228369", "step": 695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:17:22.266988", "step": 695, "epoch": 1 }, { "type": "loss", "content": 0.02432694099843502, "timestamp": "2025-09-10 02:17:22.304035", "step": 696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:22.334547", "step": 696, "epoch": 1 }, { "type": "loss", "content": 0.00980927050113678, "timestamp": "2025-09-10 02:17:22.342975", "step": 697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:22.373782", "step": 697, "epoch": 1 }, { "type": "loss", "content": 0.011573218740522861, "timestamp": "2025-09-10 02:17:22.381161", "step": 698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:22.411688", "step": 698, "epoch": 1 }, { "type": "loss", "content": 0.02407762221992016, "timestamp": "2025-09-10 02:17:22.419006", "step": 699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:22.448918", "step": 699, "epoch": 1 }, { "type": "loss", "content": 0.02633582428097725, "timestamp": "2025-09-10 02:17:22.476479", "step": 700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:22.506687", "step": 700, "epoch": 1 }, { "type": "loss", "content": 0.011304855346679688, "timestamp": "2025-09-10 02:17:22.517108", "step": 701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:22.547634", "step": 701, "epoch": 1 }, { "type": "loss", "content": 0.019669000059366226, "timestamp": "2025-09-10 02:17:22.554496", "step": 702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:22.585771", "step": 702, "epoch": 1 }, { "type": "loss", "content": 0.024419734254479408, "timestamp": "2025-09-10 02:17:22.596278", "step": 703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:22.627396", "step": 703, "epoch": 1 }, { "type": "loss", "content": 0.023848844692111015, "timestamp": "2025-09-10 02:17:22.655573", "step": 704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:22.686399", "step": 704, "epoch": 1 }, { "type": "loss", "content": 0.010360152460634708, "timestamp": "2025-09-10 02:17:22.696638", "step": 705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:22.727958", "step": 705, "epoch": 1 }, { "type": "loss", "content": 0.011346792802214622, "timestamp": "2025-09-10 02:17:22.731910", "step": 706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:22.762430", "step": 706, "epoch": 1 }, { "type": "loss", "content": 0.013497546315193176, "timestamp": "2025-09-10 02:17:22.774546", "step": 707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:22.807696", "step": 707, "epoch": 1 }, { "type": "loss", "content": 0.015288002789020538, "timestamp": "2025-09-10 02:17:22.841930", "step": 708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:22.872858", "step": 708, "epoch": 1 }, { "type": "loss", "content": 0.013141672126948833, "timestamp": "2025-09-10 02:17:22.877838", "step": 709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:22.907856", "step": 709, "epoch": 1 }, { "type": "loss", "content": 0.020733583718538284, "timestamp": "2025-09-10 02:17:22.911893", "step": 710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:22.944525", "step": 710, "epoch": 1 }, { "type": "loss", "content": 0.01969611644744873, "timestamp": "2025-09-10 02:17:22.957824", "step": 711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:22.989433", "step": 711, "epoch": 1 }, { "type": "loss", "content": 0.018604954704642296, "timestamp": "2025-09-10 02:17:23.012821", "step": 712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:23.043016", "step": 712, "epoch": 1 }, { "type": "loss", "content": 0.018361497670412064, "timestamp": "2025-09-10 02:17:23.050764", "step": 713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:23.081369", "step": 713, "epoch": 1 }, { "type": "loss", "content": 0.013966246508061886, "timestamp": "2025-09-10 02:17:23.088194", "step": 714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:23.117132", "step": 714, "epoch": 1 }, { "type": "loss", "content": 0.010636130347847939, "timestamp": "2025-09-10 02:17:23.123967", "step": 715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:23.154581", "step": 715, "epoch": 1 }, { "type": "loss", "content": 0.01876237615942955, "timestamp": "2025-09-10 02:17:23.185152", "step": 716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:23.217490", "step": 716, "epoch": 1 }, { "type": "loss", "content": 0.008436868898570538, "timestamp": "2025-09-10 02:17:23.230458", "step": 717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:23.260739", "step": 717, "epoch": 1 }, { "type": "loss", "content": 0.013500401750206947, "timestamp": "2025-09-10 02:17:23.271538", "step": 718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:23.310070", "step": 718, "epoch": 1 }, { "type": "loss", "content": 0.01989280991256237, "timestamp": "2025-09-10 02:17:23.325658", "step": 719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:23.357652", "step": 719, "epoch": 1 }, { "type": "loss", "content": 0.011787742376327515, "timestamp": "2025-09-10 02:17:23.389493", "step": 720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:23.419435", "step": 720, "epoch": 1 }, { "type": "loss", "content": 0.007818952202796936, "timestamp": "2025-09-10 02:17:23.424154", "step": 721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:23.455056", "step": 721, "epoch": 1 }, { "type": "loss", "content": 0.02153034135699272, "timestamp": "2025-09-10 02:17:23.466054", "step": 722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:23.496351", "step": 722, "epoch": 1 }, { "type": "loss", "content": 0.01763448491692543, "timestamp": "2025-09-10 02:17:23.503135", "step": 723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:23.533766", "step": 723, "epoch": 1 }, { "type": "loss", "content": 0.017756912857294083, "timestamp": "2025-09-10 02:17:23.561649", "step": 724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:23.592162", "step": 724, "epoch": 1 }, { "type": "loss", "content": 0.024670034646987915, "timestamp": "2025-09-10 02:17:23.594202", "step": 725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:23.623940", "step": 725, "epoch": 1 }, { "type": "loss", "content": 0.009984654374420643, "timestamp": "2025-09-10 02:17:23.628162", "step": 726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:23.659122", "step": 726, "epoch": 1 }, { "type": "loss", "content": 0.021593144163489342, "timestamp": "2025-09-10 02:17:23.663205", "step": 727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:23.693579", "step": 727, "epoch": 1 }, { "type": "loss", "content": 0.025460926815867424, "timestamp": "2025-09-10 02:17:23.719090", "step": 728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:23.755523", "step": 728, "epoch": 1 }, { "type": "loss", "content": 0.006311932113021612, "timestamp": "2025-09-10 02:17:23.770962", "step": 729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:23.801168", "step": 729, "epoch": 1 }, { "type": "loss", "content": 0.013910098932683468, "timestamp": "2025-09-10 02:17:23.807934", "step": 730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:23.839242", "step": 730, "epoch": 1 }, { "type": "loss", "content": 0.00481247017160058, "timestamp": "2025-09-10 02:17:23.851799", "step": 731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:23.883378", "step": 731, "epoch": 1 }, { "type": "loss", "content": 0.013029472902417183, "timestamp": "2025-09-10 02:17:23.911603", "step": 732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:23.943677", "step": 732, "epoch": 1 }, { "type": "loss", "content": 0.004847945179790258, "timestamp": "2025-09-10 02:17:23.951290", "step": 733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:23.982241", "step": 733, "epoch": 1 }, { "type": "loss", "content": 0.01138862781226635, "timestamp": "2025-09-10 02:17:23.986032", "step": 734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:24.017503", "step": 734, "epoch": 1 }, { "type": "loss", "content": 0.02151215262711048, "timestamp": "2025-09-10 02:17:24.024494", "step": 735, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:17:34.139298", "step": 735, "epoch": 1 }, { "type": "pplx", "content": 18458793.49173297, "timestamp": "2025-09-10 02:17:34.143714", "step": 735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:34.175489", "step": 735, "epoch": 1 }, { "type": "loss", "content": 0.01929275132715702, "timestamp": "2025-09-10 02:17:34.209678", "step": 736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:34.246127", "step": 736, "epoch": 1 }, { "type": "loss", "content": 0.013413517735898495, "timestamp": "2025-09-10 02:17:34.261241", "step": 737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:34.292957", "step": 737, "epoch": 1 }, { "type": "loss", "content": 0.0064349048770964146, "timestamp": "2025-09-10 02:17:34.300048", "step": 738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:34.330515", "step": 738, "epoch": 1 }, { "type": "loss", "content": 0.007773600518703461, "timestamp": "2025-09-10 02:17:34.338056", "step": 739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:34.369741", "step": 739, "epoch": 1 }, { "type": "loss", "content": 0.008945588953793049, "timestamp": "2025-09-10 02:17:34.393563", "step": 740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:34.423783", "step": 740, "epoch": 1 }, { "type": "loss", "content": 0.024763548746705055, "timestamp": "2025-09-10 02:17:34.426047", "step": 741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:34.456115", "step": 741, "epoch": 1 }, { "type": "loss", "content": 0.020738394930958748, "timestamp": "2025-09-10 02:17:34.463004", "step": 742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:34.493870", "step": 742, "epoch": 1 }, { "type": "loss", "content": 0.0035269984509795904, "timestamp": "2025-09-10 02:17:34.497726", "step": 743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:34.528175", "step": 743, "epoch": 1 }, { "type": "loss", "content": 0.030427515506744385, "timestamp": "2025-09-10 02:17:34.553115", "step": 744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:34.583845", "step": 744, "epoch": 1 }, { "type": "loss", "content": 0.007679258938878775, "timestamp": "2025-09-10 02:17:34.586081", "step": 745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:34.616894", "step": 745, "epoch": 1 }, { "type": "loss", "content": 0.028332481160759926, "timestamp": "2025-09-10 02:17:34.629300", "step": 746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:34.659701", "step": 746, "epoch": 1 }, { "type": "loss", "content": 0.008473207242786884, "timestamp": "2025-09-10 02:17:34.666562", "step": 747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:34.697133", "step": 747, "epoch": 1 }, { "type": "loss", "content": 0.017313247546553612, "timestamp": "2025-09-10 02:17:34.728079", "step": 748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:34.758141", "step": 748, "epoch": 1 }, { "type": "loss", "content": 0.013101726770401001, "timestamp": "2025-09-10 02:17:34.762842", "step": 749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:34.795984", "step": 749, "epoch": 1 }, { "type": "loss", "content": 0.0032856224570423365, "timestamp": "2025-09-10 02:17:34.809377", "step": 750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:34.839594", "step": 750, "epoch": 1 }, { "type": "loss", "content": 0.011557753197848797, "timestamp": "2025-09-10 02:17:34.846505", "step": 751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:34.877292", "step": 751, "epoch": 1 }, { "type": "loss", "content": 0.0019083227962255478, "timestamp": "2025-09-10 02:17:34.904773", "step": 752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:34.936422", "step": 752, "epoch": 1 }, { "type": "loss", "content": 0.015568030066788197, "timestamp": "2025-09-10 02:17:34.943847", "step": 753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:34.974977", "step": 753, "epoch": 1 }, { "type": "loss", "content": 0.004572854842990637, "timestamp": "2025-09-10 02:17:34.985500", "step": 754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:35.015995", "step": 754, "epoch": 1 }, { "type": "loss", "content": 0.000890351424459368, "timestamp": "2025-09-10 02:17:35.023548", "step": 755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:35.053850", "step": 755, "epoch": 1 }, { "type": "loss", "content": 0.01434353832155466, "timestamp": "2025-09-10 02:17:35.079127", "step": 756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:35.110114", "step": 756, "epoch": 1 }, { "type": "loss", "content": 0.016249870881438255, "timestamp": "2025-09-10 02:17:35.114852", "step": 757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:17:35.156515", "step": 757, "epoch": 1 }, { "type": "loss", "content": 0.038629692047834396, "timestamp": "2025-09-10 02:17:35.173760", "step": 758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:35.205032", "step": 758, "epoch": 1 }, { "type": "loss", "content": 0.05195966735482216, "timestamp": "2025-09-10 02:17:35.217498", "step": 759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:35.247998", "step": 759, "epoch": 1 }, { "type": "loss", "content": 0.03985142335295677, "timestamp": "2025-09-10 02:17:35.275939", "step": 760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:35.306453", "step": 760, "epoch": 1 }, { "type": "loss", "content": 0.012833379209041595, "timestamp": "2025-09-10 02:17:35.310647", "step": 761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:35.341151", "step": 761, "epoch": 1 }, { "type": "loss", "content": 0.018250539898872375, "timestamp": "2025-09-10 02:17:35.353575", "step": 762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:35.387132", "step": 762, "epoch": 1 }, { "type": "loss", "content": 0.006811958272010088, "timestamp": "2025-09-10 02:17:35.401072", "step": 763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:35.431899", "step": 763, "epoch": 1 }, { "type": "loss", "content": 0.01085501629859209, "timestamp": "2025-09-10 02:17:35.459535", "step": 764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:35.489598", "step": 764, "epoch": 1 }, { "type": "loss", "content": 0.034105248749256134, "timestamp": "2025-09-10 02:17:35.494325", "step": 765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:35.524429", "step": 765, "epoch": 1 }, { "type": "loss", "content": 0.007085829973220825, "timestamp": "2025-09-10 02:17:35.531214", "step": 766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:35.569083", "step": 766, "epoch": 1 }, { "type": "loss", "content": 0.0038453133311122656, "timestamp": "2025-09-10 02:17:35.584662", "step": 767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:35.616279", "step": 767, "epoch": 1 }, { "type": "loss", "content": 0.009612992405891418, "timestamp": "2025-09-10 02:17:35.644013", "step": 768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:35.675253", "step": 768, "epoch": 1 }, { "type": "loss", "content": 0.012257935479283333, "timestamp": "2025-09-10 02:17:35.677227", "step": 769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:35.708722", "step": 769, "epoch": 1 }, { "type": "loss", "content": 0.01698746345937252, "timestamp": "2025-09-10 02:17:35.720306", "step": 770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:35.751747", "step": 770, "epoch": 1 }, { "type": "loss", "content": 0.01926126517355442, "timestamp": "2025-09-10 02:17:35.758927", "step": 771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:35.791395", "step": 771, "epoch": 1 }, { "type": "loss", "content": 0.01315612904727459, "timestamp": "2025-09-10 02:17:35.815732", "step": 772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:35.847862", "step": 772, "epoch": 1 }, { "type": "loss", "content": 0.017368396744132042, "timestamp": "2025-09-10 02:17:35.856892", "step": 773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:35.892080", "step": 773, "epoch": 1 }, { "type": "loss", "content": 0.003305921098217368, "timestamp": "2025-09-10 02:17:35.899155", "step": 774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:35.935778", "step": 774, "epoch": 1 }, { "type": "loss", "content": 0.006261749658733606, "timestamp": "2025-09-10 02:17:35.942911", "step": 775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:35.989877", "step": 775, "epoch": 1 }, { "type": "loss", "content": 0.025168852880597115, "timestamp": "2025-09-10 02:17:36.015181", "step": 776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.052838", "step": 776, "epoch": 1 }, { "type": "loss", "content": 0.014582036063075066, "timestamp": "2025-09-10 02:17:36.057215", "step": 777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:36.095077", "step": 777, "epoch": 1 }, { "type": "loss", "content": 0.015908481553196907, "timestamp": "2025-09-10 02:17:36.105369", "step": 778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.136298", "step": 778, "epoch": 1 }, { "type": "loss", "content": 0.004383227322250605, "timestamp": "2025-09-10 02:17:36.142909", "step": 779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:36.173944", "step": 779, "epoch": 1 }, { "type": "loss", "content": 0.012968703173100948, "timestamp": "2025-09-10 02:17:36.201145", "step": 780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:36.231798", "step": 780, "epoch": 1 }, { "type": "loss", "content": 0.015527973882853985, "timestamp": "2025-09-10 02:17:36.236691", "step": 781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:36.271993", "step": 781, "epoch": 1 }, { "type": "loss", "content": 0.015053192153573036, "timestamp": "2025-09-10 02:17:36.285682", "step": 782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.317256", "step": 782, "epoch": 1 }, { "type": "loss", "content": 0.012528965249657631, "timestamp": "2025-09-10 02:17:36.324049", "step": 783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:36.362027", "step": 783, "epoch": 1 }, { "type": "loss", "content": 0.002056631725281477, "timestamp": "2025-09-10 02:17:36.398818", "step": 784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:36.430314", "step": 784, "epoch": 1 }, { "type": "loss", "content": 0.004613164346665144, "timestamp": "2025-09-10 02:17:36.439249", "step": 785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:36.477062", "step": 785, "epoch": 1 }, { "type": "loss", "content": 0.04229161515831947, "timestamp": "2025-09-10 02:17:36.492661", "step": 786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.523478", "step": 786, "epoch": 1 }, { "type": "loss", "content": 0.03430848568677902, "timestamp": "2025-09-10 02:17:36.530252", "step": 787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:36.563970", "step": 787, "epoch": 1 }, { "type": "loss", "content": 0.006993894465267658, "timestamp": "2025-09-10 02:17:36.598606", "step": 788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:36.632384", "step": 788, "epoch": 1 }, { "type": "loss", "content": 0.004820824600756168, "timestamp": "2025-09-10 02:17:36.636484", "step": 789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.667555", "step": 789, "epoch": 1 }, { "type": "loss", "content": 0.0256601981818676, "timestamp": "2025-09-10 02:17:36.674098", "step": 790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:17:36.721295", "step": 790, "epoch": 1 }, { "type": "loss", "content": 0.013889133930206299, "timestamp": "2025-09-10 02:17:36.740383", "step": 791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:36.771718", "step": 791, "epoch": 1 }, { "type": "loss", "content": 0.016334451735019684, "timestamp": "2025-09-10 02:17:36.799678", "step": 792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.830397", "step": 792, "epoch": 1 }, { "type": "loss", "content": 0.013368922285735607, "timestamp": "2025-09-10 02:17:36.834885", "step": 793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:36.867576", "step": 793, "epoch": 1 }, { "type": "loss", "content": 0.031976792961359024, "timestamp": "2025-09-10 02:17:36.874661", "step": 794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:36.911198", "step": 794, "epoch": 1 }, { "type": "loss", "content": 0.0013559797080233693, "timestamp": "2025-09-10 02:17:36.918054", "step": 795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:36.957012", "step": 795, "epoch": 1 }, { "type": "loss", "content": 0.04322435334324837, "timestamp": "2025-09-10 02:17:36.991235", "step": 796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:37.031780", "step": 796, "epoch": 1 }, { "type": "loss", "content": 0.0115485405549407, "timestamp": "2025-09-10 02:17:37.044793", "step": 797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:37.083728", "step": 797, "epoch": 1 }, { "type": "loss", "content": 0.003736104816198349, "timestamp": "2025-09-10 02:17:37.090801", "step": 798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:37.124580", "step": 798, "epoch": 1 }, { "type": "loss", "content": 0.014022842049598694, "timestamp": "2025-09-10 02:17:37.128190", "step": 799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:37.159139", "step": 799, "epoch": 1 }, { "type": "loss", "content": 0.013005274347960949, "timestamp": "2025-09-10 02:17:37.182616", "step": 800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:37.213215", "step": 800, "epoch": 1 }, { "type": "loss", "content": 0.02133549191057682, "timestamp": "2025-09-10 02:17:37.215490", "step": 801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:37.247157", "step": 801, "epoch": 1 }, { "type": "loss", "content": 0.0029299429152160883, "timestamp": "2025-09-10 02:17:37.259354", "step": 802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:37.295020", "step": 802, "epoch": 1 }, { "type": "loss", "content": 0.0014492860063910484, "timestamp": "2025-09-10 02:17:37.297558", "step": 803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:37.333756", "step": 803, "epoch": 1 }, { "type": "loss", "content": 0.021839609369635582, "timestamp": "2025-09-10 02:17:37.368272", "step": 804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:37.400485", "step": 804, "epoch": 1 }, { "type": "loss", "content": 0.030333133414387703, "timestamp": "2025-09-10 02:17:37.404770", "step": 805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:37.437794", "step": 805, "epoch": 1 }, { "type": "loss", "content": 0.006924864836037159, "timestamp": "2025-09-10 02:17:37.448090", "step": 806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:37.479748", "step": 806, "epoch": 1 }, { "type": "loss", "content": 0.0072951540350914, "timestamp": "2025-09-10 02:17:37.486157", "step": 807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:37.517782", "step": 807, "epoch": 1 }, { "type": "loss", "content": 0.006309094373136759, "timestamp": "2025-09-10 02:17:37.545367", "step": 808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:37.580837", "step": 808, "epoch": 1 }, { "type": "loss", "content": 0.00040522878407500684, "timestamp": "2025-09-10 02:17:37.585120", "step": 809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:37.617472", "step": 809, "epoch": 1 }, { "type": "loss", "content": 0.006362107116729021, "timestamp": "2025-09-10 02:17:37.619848", "step": 810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:37.651402", "step": 810, "epoch": 1 }, { "type": "loss", "content": 0.008096226491034031, "timestamp": "2025-09-10 02:17:37.658397", "step": 811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:37.691987", "step": 811, "epoch": 1 }, { "type": "loss", "content": 0.007765918970108032, "timestamp": "2025-09-10 02:17:37.726560", "step": 812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:37.758589", "step": 812, "epoch": 1 }, { "type": "loss", "content": 0.019435886293649673, "timestamp": "2025-09-10 02:17:37.762951", "step": 813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:37.794126", "step": 813, "epoch": 1 }, { "type": "loss", "content": 0.009066428057849407, "timestamp": "2025-09-10 02:17:37.797550", "step": 814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:37.828748", "step": 814, "epoch": 1 }, { "type": "loss", "content": 0.0014025976415723562, "timestamp": "2025-09-10 02:17:37.838601", "step": 815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:37.870535", "step": 815, "epoch": 1 }, { "type": "loss", "content": 0.023165103048086166, "timestamp": "2025-09-10 02:17:37.900977", "step": 816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:37.933424", "step": 816, "epoch": 1 }, { "type": "loss", "content": 0.012905867770314217, "timestamp": "2025-09-10 02:17:37.946463", "step": 817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:37.979295", "step": 817, "epoch": 1 }, { "type": "loss", "content": 0.023732444271445274, "timestamp": "2025-09-10 02:17:37.992647", "step": 818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:38.024568", "step": 818, "epoch": 1 }, { "type": "loss", "content": 0.03576406463980675, "timestamp": "2025-09-10 02:17:38.033872", "step": 819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:38.064401", "step": 819, "epoch": 1 }, { "type": "loss", "content": 0.039100583642721176, "timestamp": "2025-09-10 02:17:38.089401", "step": 820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:38.120591", "step": 820, "epoch": 1 }, { "type": "loss", "content": 0.019176315516233444, "timestamp": "2025-09-10 02:17:38.124732", "step": 821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:38.156050", "step": 821, "epoch": 1 }, { "type": "loss", "content": 0.06485612690448761, "timestamp": "2025-09-10 02:17:38.163315", "step": 822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:17:38.193669", "step": 822, "epoch": 1 }, { "type": "loss", "content": 0.0012710961746051908, "timestamp": "2025-09-10 02:17:38.197109", "step": 823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:38.227795", "step": 823, "epoch": 1 }, { "type": "loss", "content": 0.002674340968951583, "timestamp": "2025-09-10 02:17:38.252966", "step": 824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:38.284796", "step": 824, "epoch": 1 }, { "type": "loss", "content": 0.005030173342674971, "timestamp": "2025-09-10 02:17:38.291925", "step": 825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:38.324273", "step": 825, "epoch": 1 }, { "type": "loss", "content": 0.033865850418806076, "timestamp": "2025-09-10 02:17:38.327924", "step": 826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:38.359190", "step": 826, "epoch": 1 }, { "type": "loss", "content": 0.051615625619888306, "timestamp": "2025-09-10 02:17:38.363253", "step": 827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:38.393455", "step": 827, "epoch": 1 }, { "type": "loss", "content": 0.005924302618950605, "timestamp": "2025-09-10 02:17:38.421082", "step": 828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:38.452362", "step": 828, "epoch": 1 }, { "type": "loss", "content": 0.009351923130452633, "timestamp": "2025-09-10 02:17:38.461365", "step": 829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:38.492734", "step": 829, "epoch": 1 }, { "type": "loss", "content": 0.014958539046347141, "timestamp": "2025-09-10 02:17:38.500339", "step": 830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:38.532470", "step": 830, "epoch": 1 }, { "type": "loss", "content": 0.009349385276436806, "timestamp": "2025-09-10 02:17:38.538976", "step": 831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:38.570077", "step": 831, "epoch": 1 }, { "type": "loss", "content": 0.026025842875242233, "timestamp": "2025-09-10 02:17:38.603218", "step": 832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:38.635495", "step": 832, "epoch": 1 }, { "type": "loss", "content": 0.018966345116496086, "timestamp": "2025-09-10 02:17:38.642712", "step": 833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:38.674175", "step": 833, "epoch": 1 }, { "type": "loss", "content": 0.04127897694706917, "timestamp": "2025-09-10 02:17:38.680832", "step": 834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:38.712426", "step": 834, "epoch": 1 }, { "type": "loss", "content": 0.04661082848906517, "timestamp": "2025-09-10 02:17:38.719223", "step": 835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:38.751110", "step": 835, "epoch": 1 }, { "type": "loss", "content": 0.022765228524804115, "timestamp": "2025-09-10 02:17:38.778922", "step": 836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:38.809955", "step": 836, "epoch": 1 }, { "type": "loss", "content": 0.01644345186650753, "timestamp": "2025-09-10 02:17:38.818849", "step": 837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:38.857840", "step": 837, "epoch": 1 }, { "type": "loss", "content": 0.046977002173662186, "timestamp": "2025-09-10 02:17:38.873689", "step": 838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:38.905257", "step": 838, "epoch": 1 }, { "type": "loss", "content": 0.02852710708975792, "timestamp": "2025-09-10 02:17:38.909440", "step": 839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:38.939934", "step": 839, "epoch": 1 }, { "type": "loss", "content": 0.04006481543183327, "timestamp": "2025-09-10 02:17:38.967495", "step": 840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:17:39.007106", "step": 840, "epoch": 1 }, { "type": "loss", "content": 0.026953106746077538, "timestamp": "2025-09-10 02:17:39.023998", "step": 841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:39.054804", "step": 841, "epoch": 1 }, { "type": "loss", "content": 0.010299399495124817, "timestamp": "2025-09-10 02:17:39.061572", "step": 842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:39.091752", "step": 842, "epoch": 1 }, { "type": "loss", "content": 0.018122700974345207, "timestamp": "2025-09-10 02:17:39.094076", "step": 843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:39.125506", "step": 843, "epoch": 1 }, { "type": "loss", "content": 0.015159577131271362, "timestamp": "2025-09-10 02:17:39.153533", "step": 844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:17:39.190048", "step": 844, "epoch": 1 }, { "type": "loss", "content": 0.017123881727457047, "timestamp": "2025-09-10 02:17:39.205650", "step": 845, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:39.240889", "step": 845, "epoch": 1 }, { "type": "loss", "content": 0.005689023993909359, "timestamp": "2025-09-10 02:17:39.254559", "step": 846, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:39.288982", "step": 846, "epoch": 1 }, { "type": "loss", "content": 0.016331713646650314, "timestamp": "2025-09-10 02:17:39.302923", "step": 847, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:39.335924", "step": 847, "epoch": 1 }, { "type": "loss", "content": 0.022277653217315674, "timestamp": "2025-09-10 02:17:39.360399", "step": 848, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:39.392440", "step": 848, "epoch": 1 }, { "type": "loss", "content": 0.027887245640158653, "timestamp": "2025-09-10 02:17:39.394610", "step": 849, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:39.425130", "step": 849, "epoch": 1 }, { "type": "loss", "content": 0.006013993173837662, "timestamp": "2025-09-10 02:17:39.435458", "step": 850, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:39.466820", "step": 850, "epoch": 1 }, { "type": "loss", "content": 0.00961573701351881, "timestamp": "2025-09-10 02:17:39.473322", "step": 851, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:39.504528", "step": 851, "epoch": 1 }, { "type": "loss", "content": 0.028055304661393166, "timestamp": "2025-09-10 02:17:39.534905", "step": 852, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:17:39.574164", "step": 852, "epoch": 1 }, { "type": "loss", "content": 0.010051725432276726, "timestamp": "2025-09-10 02:17:39.591067", "step": 853, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:39.630072", "step": 853, "epoch": 1 }, { "type": "loss", "content": 0.008215261623263359, "timestamp": "2025-09-10 02:17:39.645899", "step": 854, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:39.677467", "step": 854, "epoch": 1 }, { "type": "loss", "content": 0.014234711416065693, "timestamp": "2025-09-10 02:17:39.683693", "step": 855, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:39.715222", "step": 855, "epoch": 1 }, { "type": "loss", "content": 0.011703640222549438, "timestamp": "2025-09-10 02:17:39.745554", "step": 856, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:39.777271", "step": 856, "epoch": 1 }, { "type": "loss", "content": 0.010386110283434391, "timestamp": "2025-09-10 02:17:39.786202", "step": 857, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:39.818376", "step": 857, "epoch": 1 }, { "type": "loss", "content": 0.026391830295324326, "timestamp": "2025-09-10 02:17:39.825003", "step": 858, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:39.856397", "step": 858, "epoch": 1 }, { "type": "loss", "content": 0.02559771202504635, "timestamp": "2025-09-10 02:17:39.862969", "step": 859, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:39.899353", "step": 859, "epoch": 1 }, { "type": "loss", "content": 0.019128460437059402, "timestamp": "2025-09-10 02:17:39.934178", "step": 860, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:39.966353", "step": 860, "epoch": 1 }, { "type": "loss", "content": 0.00833536684513092, "timestamp": "2025-09-10 02:17:39.978583", "step": 861, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:40.009753", "step": 861, "epoch": 1 }, { "type": "loss", "content": 0.012589543126523495, "timestamp": "2025-09-10 02:17:40.016543", "step": 862, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:40.049684", "step": 862, "epoch": 1 }, { "type": "loss", "content": 0.006604044698178768, "timestamp": "2025-09-10 02:17:40.053898", "step": 863, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:40.086709", "step": 863, "epoch": 1 }, { "type": "loss", "content": 0.02710030786693096, "timestamp": "2025-09-10 02:17:40.114207", "step": 864, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:40.145830", "step": 864, "epoch": 1 }, { "type": "loss", "content": 0.006634250283241272, "timestamp": "2025-09-10 02:17:40.153831", "step": 865, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:40.184219", "step": 865, "epoch": 1 }, { "type": "loss", "content": 0.03137756139039993, "timestamp": "2025-09-10 02:17:40.190972", "step": 866, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:40.222239", "step": 866, "epoch": 1 }, { "type": "loss", "content": 0.02019382454454899, "timestamp": "2025-09-10 02:17:40.229805", "step": 867, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:40.260687", "step": 867, "epoch": 1 }, { "type": "loss", "content": 0.00791159924119711, "timestamp": "2025-09-10 02:17:40.288245", "step": 868, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:40.318958", "step": 868, "epoch": 1 }, { "type": "loss", "content": 0.005334521643817425, "timestamp": "2025-09-10 02:17:40.323049", "step": 869, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:40.354600", "step": 869, "epoch": 1 }, { "type": "loss", "content": 0.009703114628791809, "timestamp": "2025-09-10 02:17:40.358892", "step": 870, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:40.390479", "step": 870, "epoch": 1 }, { "type": "loss", "content": 0.029153967276215553, "timestamp": "2025-09-10 02:17:40.397821", "step": 871, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:40.428409", "step": 871, "epoch": 1 }, { "type": "loss", "content": 0.007150101009756327, "timestamp": "2025-09-10 02:17:40.451663", "step": 872, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:40.482550", "step": 872, "epoch": 1 }, { "type": "loss", "content": 0.008403966203331947, "timestamp": "2025-09-10 02:17:40.486683", "step": 873, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:40.519587", "step": 873, "epoch": 1 }, { "type": "loss", "content": 0.029630528762936592, "timestamp": "2025-09-10 02:17:40.532895", "step": 874, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:40.564255", "step": 874, "epoch": 1 }, { "type": "loss", "content": 0.011236722581088543, "timestamp": "2025-09-10 02:17:40.571169", "step": 875, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:40.602266", "step": 875, "epoch": 1 }, { "type": "loss", "content": 0.01793195679783821, "timestamp": "2025-09-10 02:17:40.630123", "step": 876, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:40.660927", "step": 876, "epoch": 1 }, { "type": "loss", "content": 0.004515457898378372, "timestamp": "2025-09-10 02:17:40.663538", "step": 877, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:40.694461", "step": 877, "epoch": 1 }, { "type": "loss", "content": 0.01135172974318266, "timestamp": "2025-09-10 02:17:40.698873", "step": 878, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:40.729815", "step": 878, "epoch": 1 }, { "type": "loss", "content": 0.00693327933549881, "timestamp": "2025-09-10 02:17:40.739606", "step": 879, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:40.771350", "step": 879, "epoch": 1 }, { "type": "loss", "content": 0.017033789306879044, "timestamp": "2025-09-10 02:17:40.799018", "step": 880, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:40.829888", "step": 880, "epoch": 1 }, { "type": "loss", "content": 0.012997663579881191, "timestamp": "2025-09-10 02:17:40.839821", "step": 881, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:40.870538", "step": 881, "epoch": 1 }, { "type": "loss", "content": 0.009415126405656338, "timestamp": "2025-09-10 02:17:40.881247", "step": 882, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:17:51.050797", "step": 882, "epoch": 1 }, { "type": "pplx", "content": 14730864.383457733, "timestamp": "2025-09-10 02:17:51.055567", "step": 882, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:51.091661", "step": 882, "epoch": 1 }, { "type": "loss", "content": 0.022233616560697556, "timestamp": "2025-09-10 02:17:51.099797", "step": 883, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:51.135781", "step": 883, "epoch": 1 }, { "type": "loss", "content": 0.009659935720264912, "timestamp": "2025-09-10 02:17:51.163204", "step": 884, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:51.201257", "step": 884, "epoch": 1 }, { "type": "loss", "content": 0.030786585062742233, "timestamp": "2025-09-10 02:17:51.207893", "step": 885, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:51.238953", "step": 885, "epoch": 1 }, { "type": "loss", "content": 0.021192189306020737, "timestamp": "2025-09-10 02:17:51.246685", "step": 886, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 784 ], "flops": 23255845310656 }, "timestamp": "2025-09-10 02:17:51.318129", "step": 886, "epoch": 1 }, { "type": "loss", "content": 0.04029746726155281, "timestamp": "2025-09-10 02:17:51.345232", "step": 887, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:51.376862", "step": 887, "epoch": 1 }, { "type": "loss", "content": 0.006447978317737579, "timestamp": "2025-09-10 02:17:51.410235", "step": 888, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:51.441644", "step": 888, "epoch": 1 }, { "type": "loss", "content": 0.02322995476424694, "timestamp": "2025-09-10 02:17:51.443876", "step": 889, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:51.474542", "step": 889, "epoch": 1 }, { "type": "loss", "content": 0.010987287387251854, "timestamp": "2025-09-10 02:17:51.485276", "step": 890, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:51.516512", "step": 890, "epoch": 1 }, { "type": "loss", "content": 0.02331923507153988, "timestamp": "2025-09-10 02:17:51.526594", "step": 891, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:17:51.565807", "step": 891, "epoch": 1 }, { "type": "loss", "content": 0.003565514227375388, "timestamp": "2025-09-10 02:17:51.602876", "step": 892, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:51.633770", "step": 892, "epoch": 1 }, { "type": "loss", "content": 0.04227833077311516, "timestamp": "2025-09-10 02:17:51.636076", "step": 893, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:51.667184", "step": 893, "epoch": 1 }, { "type": "loss", "content": 0.021429577842354774, "timestamp": "2025-09-10 02:17:51.674087", "step": 894, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:51.705630", "step": 894, "epoch": 1 }, { "type": "loss", "content": 0.03835199028253555, "timestamp": "2025-09-10 02:17:51.713251", "step": 895, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:51.743135", "step": 895, "epoch": 1 }, { "type": "loss", "content": 0.0153651786968112, "timestamp": "2025-09-10 02:17:51.768400", "step": 896, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:51.801241", "step": 896, "epoch": 1 }, { "type": "loss", "content": 0.0387214757502079, "timestamp": "2025-09-10 02:17:51.803342", "step": 897, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:51.832852", "step": 897, "epoch": 1 }, { "type": "loss", "content": 0.0020098849199712276, "timestamp": "2025-09-10 02:17:51.837127", "step": 898, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:51.867212", "step": 898, "epoch": 1 }, { "type": "loss", "content": 0.009437446482479572, "timestamp": "2025-09-10 02:17:51.875034", "step": 899, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:51.906254", "step": 899, "epoch": 1 }, { "type": "loss", "content": 0.025736164301633835, "timestamp": "2025-09-10 02:17:51.934806", "step": 900, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:51.966020", "step": 900, "epoch": 1 }, { "type": "loss", "content": 0.001165196648798883, "timestamp": "2025-09-10 02:17:51.968310", "step": 901, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:52.000301", "step": 901, "epoch": 1 }, { "type": "loss", "content": 0.019891690462827682, "timestamp": "2025-09-10 02:17:52.008143", "step": 902, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:52.039005", "step": 902, "epoch": 1 }, { "type": "loss", "content": 0.03028137981891632, "timestamp": "2025-09-10 02:17:52.045936", "step": 903, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:17:52.084242", "step": 903, "epoch": 1 }, { "type": "loss", "content": 0.0014906581491231918, "timestamp": "2025-09-10 02:17:52.120703", "step": 904, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:52.151409", "step": 904, "epoch": 1 }, { "type": "loss", "content": 0.009351144544780254, "timestamp": "2025-09-10 02:17:52.159191", "step": 905, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:52.189988", "step": 905, "epoch": 1 }, { "type": "loss", "content": 0.032125215977430344, "timestamp": "2025-09-10 02:17:52.193955", "step": 906, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:17:52.237212", "step": 906, "epoch": 1 }, { "type": "loss", "content": 0.02834523655474186, "timestamp": "2025-09-10 02:17:52.254904", "step": 907, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:52.290019", "step": 907, "epoch": 1 }, { "type": "loss", "content": 0.011470122262835503, "timestamp": "2025-09-10 02:17:52.324553", "step": 908, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:52.357249", "step": 908, "epoch": 1 }, { "type": "loss", "content": 0.03763606771826744, "timestamp": "2025-09-10 02:17:52.359462", "step": 909, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:52.390369", "step": 909, "epoch": 1 }, { "type": "loss", "content": 0.0020886852871626616, "timestamp": "2025-09-10 02:17:52.397115", "step": 910, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:52.428896", "step": 910, "epoch": 1 }, { "type": "loss", "content": 0.00491158664226532, "timestamp": "2025-09-10 02:17:52.436093", "step": 911, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:52.466335", "step": 911, "epoch": 1 }, { "type": "loss", "content": 0.010647162795066833, "timestamp": "2025-09-10 02:17:52.494032", "step": 912, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:52.525074", "step": 912, "epoch": 1 }, { "type": "loss", "content": 0.02704322710633278, "timestamp": "2025-09-10 02:17:52.527308", "step": 913, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:52.558028", "step": 913, "epoch": 1 }, { "type": "loss", "content": 0.03703900799155235, "timestamp": "2025-09-10 02:17:52.565527", "step": 914, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:52.595613", "step": 914, "epoch": 1 }, { "type": "loss", "content": 0.05229032784700394, "timestamp": "2025-09-10 02:17:52.603099", "step": 915, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:52.632844", "step": 915, "epoch": 1 }, { "type": "loss", "content": 0.02464185282588005, "timestamp": "2025-09-10 02:17:52.657740", "step": 916, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:52.687050", "step": 916, "epoch": 1 }, { "type": "loss", "content": 0.029760537669062614, "timestamp": "2025-09-10 02:17:52.688949", "step": 917, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:52.719815", "step": 917, "epoch": 1 }, { "type": "loss", "content": 0.023682432249188423, "timestamp": "2025-09-10 02:17:52.727378", "step": 918, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:52.758497", "step": 918, "epoch": 1 }, { "type": "loss", "content": 0.0028019100427627563, "timestamp": "2025-09-10 02:17:52.766244", "step": 919, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:52.796453", "step": 919, "epoch": 1 }, { "type": "loss", "content": 0.013780993409454823, "timestamp": "2025-09-10 02:17:52.824088", "step": 920, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:52.854433", "step": 920, "epoch": 1 }, { "type": "loss", "content": 0.003197154263034463, "timestamp": "2025-09-10 02:17:52.858986", "step": 921, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:52.890585", "step": 921, "epoch": 1 }, { "type": "loss", "content": 0.002631398383527994, "timestamp": "2025-09-10 02:17:52.903136", "step": 922, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:52.934809", "step": 922, "epoch": 1 }, { "type": "loss", "content": 0.01756918616592884, "timestamp": "2025-09-10 02:17:52.945697", "step": 923, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:52.976339", "step": 923, "epoch": 1 }, { "type": "loss", "content": 0.022316312417387962, "timestamp": "2025-09-10 02:17:53.001800", "step": 924, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:53.033155", "step": 924, "epoch": 1 }, { "type": "loss", "content": 0.018341967836022377, "timestamp": "2025-09-10 02:17:53.040820", "step": 925, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:53.075483", "step": 925, "epoch": 1 }, { "type": "loss", "content": 0.007413599174469709, "timestamp": "2025-09-10 02:17:53.089315", "step": 926, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:53.119379", "step": 926, "epoch": 1 }, { "type": "loss", "content": 0.02032196894288063, "timestamp": "2025-09-10 02:17:53.126669", "step": 927, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:53.157084", "step": 927, "epoch": 1 }, { "type": "loss", "content": 0.007340454496443272, "timestamp": "2025-09-10 02:17:53.185787", "step": 928, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:53.218495", "step": 928, "epoch": 1 }, { "type": "loss", "content": 0.017748655751347542, "timestamp": "2025-09-10 02:17:53.226305", "step": 929, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:53.256717", "step": 929, "epoch": 1 }, { "type": "loss", "content": 0.042677875608205795, "timestamp": "2025-09-10 02:17:53.260877", "step": 930, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:53.291560", "step": 930, "epoch": 1 }, { "type": "loss", "content": 0.009248084388673306, "timestamp": "2025-09-10 02:17:53.299141", "step": 931, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:53.330419", "step": 931, "epoch": 1 }, { "type": "loss", "content": 0.015127205289900303, "timestamp": "2025-09-10 02:17:53.361965", "step": 932, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:53.392356", "step": 932, "epoch": 1 }, { "type": "loss", "content": 0.02554660104215145, "timestamp": "2025-09-10 02:17:53.394539", "step": 933, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:53.425626", "step": 933, "epoch": 1 }, { "type": "loss", "content": 0.018056869506835938, "timestamp": "2025-09-10 02:17:53.437773", "step": 934, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:53.468052", "step": 934, "epoch": 1 }, { "type": "loss", "content": 0.039137158542871475, "timestamp": "2025-09-10 02:17:53.475535", "step": 935, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:53.505811", "step": 935, "epoch": 1 }, { "type": "loss", "content": 0.03655305504798889, "timestamp": "2025-09-10 02:17:53.530566", "step": 936, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:53.563734", "step": 936, "epoch": 1 }, { "type": "loss", "content": 0.02264043502509594, "timestamp": "2025-09-10 02:17:53.571984", "step": 937, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:53.603803", "step": 937, "epoch": 1 }, { "type": "loss", "content": 0.0072896406054496765, "timestamp": "2025-09-10 02:17:53.607995", "step": 938, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:53.639895", "step": 938, "epoch": 1 }, { "type": "loss", "content": 0.01063856016844511, "timestamp": "2025-09-10 02:17:53.646943", "step": 939, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:53.677247", "step": 939, "epoch": 1 }, { "type": "loss", "content": 0.012549477629363537, "timestamp": "2025-09-10 02:17:53.705519", "step": 940, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:17:53.742721", "step": 940, "epoch": 1 }, { "type": "loss", "content": 0.007854852825403214, "timestamp": "2025-09-10 02:17:53.758153", "step": 941, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:53.789869", "step": 941, "epoch": 1 }, { "type": "loss", "content": 0.008021929301321507, "timestamp": "2025-09-10 02:17:53.797362", "step": 942, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:53.828747", "step": 942, "epoch": 1 }, { "type": "loss", "content": 0.03075227700173855, "timestamp": "2025-09-10 02:17:53.836399", "step": 943, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:53.867457", "step": 943, "epoch": 1 }, { "type": "loss", "content": 0.012394532561302185, "timestamp": "2025-09-10 02:17:53.892784", "step": 944, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:53.924195", "step": 944, "epoch": 1 }, { "type": "loss", "content": 0.025795314460992813, "timestamp": "2025-09-10 02:17:53.928702", "step": 945, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:53.965401", "step": 945, "epoch": 1 }, { "type": "loss", "content": 0.03643295168876648, "timestamp": "2025-09-10 02:17:53.972943", "step": 946, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:54.012422", "step": 946, "epoch": 1 }, { "type": "loss", "content": 0.01099133025854826, "timestamp": "2025-09-10 02:17:54.019892", "step": 947, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:54.055067", "step": 947, "epoch": 1 }, { "type": "loss", "content": 0.010035491548478603, "timestamp": "2025-09-10 02:17:54.082908", "step": 948, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:54.130125", "step": 948, "epoch": 1 }, { "type": "loss", "content": 0.018783031031489372, "timestamp": "2025-09-10 02:17:54.135462", "step": 949, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:54.172827", "step": 949, "epoch": 1 }, { "type": "loss", "content": 0.0065679592080414295, "timestamp": "2025-09-10 02:17:54.179688", "step": 950, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:54.227461", "step": 950, "epoch": 1 }, { "type": "loss", "content": 0.030873224139213562, "timestamp": "2025-09-10 02:17:54.234850", "step": 951, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:54.267001", "step": 951, "epoch": 1 }, { "type": "loss", "content": 0.00860142894089222, "timestamp": "2025-09-10 02:17:54.298805", "step": 952, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:54.329849", "step": 952, "epoch": 1 }, { "type": "loss", "content": 0.02184119261801243, "timestamp": "2025-09-10 02:17:54.334802", "step": 953, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:54.365625", "step": 953, "epoch": 1 }, { "type": "loss", "content": 0.009527009911835194, "timestamp": "2025-09-10 02:17:54.375907", "step": 954, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:54.408819", "step": 954, "epoch": 1 }, { "type": "loss", "content": 0.020068276673555374, "timestamp": "2025-09-10 02:17:54.422151", "step": 955, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:54.457236", "step": 955, "epoch": 1 }, { "type": "loss", "content": 0.010814903303980827, "timestamp": "2025-09-10 02:17:54.488267", "step": 956, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:54.520071", "step": 956, "epoch": 1 }, { "type": "loss", "content": 0.02371845953166485, "timestamp": "2025-09-10 02:17:54.525943", "step": 957, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:54.565905", "step": 957, "epoch": 1 }, { "type": "loss", "content": 0.029727578163146973, "timestamp": "2025-09-10 02:17:54.571947", "step": 958, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:54.603808", "step": 958, "epoch": 1 }, { "type": "loss", "content": 0.05049288645386696, "timestamp": "2025-09-10 02:17:54.614183", "step": 959, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:54.646310", "step": 959, "epoch": 1 }, { "type": "loss", "content": 0.00983439851552248, "timestamp": "2025-09-10 02:17:54.671570", "step": 960, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:54.705032", "step": 960, "epoch": 1 }, { "type": "loss", "content": 0.02309414930641651, "timestamp": "2025-09-10 02:17:54.711307", "step": 961, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:17:54.751183", "step": 961, "epoch": 1 }, { "type": "loss", "content": 0.0041077896021306515, "timestamp": "2025-09-10 02:17:54.765144", "step": 962, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:54.798115", "step": 962, "epoch": 1 }, { "type": "loss", "content": 0.0642273798584938, "timestamp": "2025-09-10 02:17:54.810293", "step": 963, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:17:54.845738", "step": 963, "epoch": 1 }, { "type": "loss", "content": 0.007594208233058453, "timestamp": "2025-09-10 02:17:54.880449", "step": 964, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:54.910901", "step": 964, "epoch": 1 }, { "type": "loss", "content": 0.021292701363563538, "timestamp": "2025-09-10 02:17:54.915566", "step": 965, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:54.947901", "step": 965, "epoch": 1 }, { "type": "loss", "content": 0.00916915200650692, "timestamp": "2025-09-10 02:17:54.951819", "step": 966, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:54.983075", "step": 966, "epoch": 1 }, { "type": "loss", "content": 0.006665355525910854, "timestamp": "2025-09-10 02:17:54.990165", "step": 967, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:55.025106", "step": 967, "epoch": 1 }, { "type": "loss", "content": 0.012122230604290962, "timestamp": "2025-09-10 02:17:55.058652", "step": 968, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-10 02:17:55.103815", "step": 968, "epoch": 1 }, { "type": "loss", "content": 0.0122977988794446, "timestamp": "2025-09-10 02:17:55.123080", "step": 969, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:55.154056", "step": 969, "epoch": 1 }, { "type": "loss", "content": 0.00949710514396429, "timestamp": "2025-09-10 02:17:55.164111", "step": 970, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:55.194022", "step": 970, "epoch": 1 }, { "type": "loss", "content": 0.018477456644177437, "timestamp": "2025-09-10 02:17:55.198355", "step": 971, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:55.229368", "step": 971, "epoch": 1 }, { "type": "loss", "content": 0.014558763243258, "timestamp": "2025-09-10 02:17:55.257943", "step": 972, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:17:55.294468", "step": 972, "epoch": 1 }, { "type": "loss", "content": 0.008184276521205902, "timestamp": "2025-09-10 02:17:55.307496", "step": 973, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:55.343894", "step": 973, "epoch": 1 }, { "type": "loss", "content": 0.017617663368582726, "timestamp": "2025-09-10 02:17:55.349012", "step": 974, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:55.385405", "step": 974, "epoch": 1 }, { "type": "loss", "content": 0.013045444153249264, "timestamp": "2025-09-10 02:17:55.389583", "step": 975, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:55.421516", "step": 975, "epoch": 1 }, { "type": "loss", "content": 0.02563711628317833, "timestamp": "2025-09-10 02:17:55.451436", "step": 976, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:55.481948", "step": 976, "epoch": 1 }, { "type": "loss", "content": 0.013601159676909447, "timestamp": "2025-09-10 02:17:55.484011", "step": 977, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:55.514755", "step": 977, "epoch": 1 }, { "type": "loss", "content": 0.026903489604592323, "timestamp": "2025-09-10 02:17:55.517153", "step": 978, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:55.548412", "step": 978, "epoch": 1 }, { "type": "loss", "content": 0.008285568095743656, "timestamp": "2025-09-10 02:17:55.552794", "step": 979, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:17:55.584495", "step": 979, "epoch": 1 }, { "type": "loss", "content": 0.008680049329996109, "timestamp": "2025-09-10 02:17:55.615365", "step": 980, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:55.646309", "step": 980, "epoch": 1 }, { "type": "loss", "content": 0.004374523181468248, "timestamp": "2025-09-10 02:17:55.651294", "step": 981, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:17:55.682655", "step": 981, "epoch": 1 }, { "type": "loss", "content": 0.018112564459443092, "timestamp": "2025-09-10 02:17:55.693510", "step": 982, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:17:55.723932", "step": 982, "epoch": 1 }, { "type": "loss", "content": 0.017343124374747276, "timestamp": "2025-09-10 02:17:55.728098", "step": 983, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:55.757844", "step": 983, "epoch": 1 }, { "type": "loss", "content": 0.009760797023773193, "timestamp": "2025-09-10 02:17:55.785672", "step": 984, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:17:55.816268", "step": 984, "epoch": 1 }, { "type": "loss", "content": 0.010807998478412628, "timestamp": "2025-09-10 02:17:55.819473", "step": 985, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:17:55.853423", "step": 985, "epoch": 1 }, { "type": "loss", "content": 0.024396957829594612, "timestamp": "2025-09-10 02:17:55.856074", "step": 986, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:55.887069", "step": 986, "epoch": 1 }, { "type": "loss", "content": 0.02711336500942707, "timestamp": "2025-09-10 02:17:55.894496", "step": 987, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:17:55.931416", "step": 987, "epoch": 1 }, { "type": "loss", "content": 0.006434720940887928, "timestamp": "2025-09-10 02:17:55.965598", "step": 988, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:56.000656", "step": 988, "epoch": 1 }, { "type": "loss", "content": 0.02933250367641449, "timestamp": "2025-09-10 02:17:56.002861", "step": 989, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:56.035652", "step": 989, "epoch": 1 }, { "type": "loss", "content": 0.04050236940383911, "timestamp": "2025-09-10 02:17:56.047816", "step": 990, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:17:56.083529", "step": 990, "epoch": 1 }, { "type": "loss", "content": 0.03573581948876381, "timestamp": "2025-09-10 02:17:56.090451", "step": 991, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:56.137426", "step": 991, "epoch": 1 }, { "type": "loss", "content": 0.013186642900109291, "timestamp": "2025-09-10 02:17:56.165631", "step": 992, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:17:56.198381", "step": 992, "epoch": 1 }, { "type": "loss", "content": 0.00814458541572094, "timestamp": "2025-09-10 02:17:56.202916", "step": 993, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:17:56.235284", "step": 993, "epoch": 1 }, { "type": "loss", "content": 0.04362935200333595, "timestamp": "2025-09-10 02:17:56.239732", "step": 994, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:56.272860", "step": 994, "epoch": 1 }, { "type": "loss", "content": 0.005004457198083401, "timestamp": "2025-09-10 02:17:56.280773", "step": 995, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:17:56.312241", "step": 995, "epoch": 1 }, { "type": "loss", "content": 0.026401042938232422, "timestamp": "2025-09-10 02:17:56.340827", "step": 996, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:17:56.371718", "step": 996, "epoch": 1 }, { "type": "loss", "content": 0.014147581532597542, "timestamp": "2025-09-10 02:17:56.381443", "step": 997, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:56.411960", "step": 997, "epoch": 1 }, { "type": "loss", "content": 0.005374276544898748, "timestamp": "2025-09-10 02:17:56.419394", "step": 998, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:17:56.450530", "step": 998, "epoch": 1 }, { "type": "loss", "content": 0.00914605613797903, "timestamp": "2025-09-10 02:17:56.457781", "step": 999, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:17:56.489128", "step": 999, "epoch": 1 }, { "type": "loss", "content": 0.01612034998834133, "timestamp": "2025-09-10 02:17:56.522591", "step": 1000, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1000", "timestamp": "2025-09-10 02:18:01.459465", "step": 1000, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:01.491383", "step": 1000, "epoch": 1 }, { "type": "loss", "content": 0.0174104031175375, "timestamp": "2025-09-10 02:18:01.494305", "step": 1001, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:01.528010", "step": 1001, "epoch": 1 }, { "type": "loss", "content": 0.026182083413004875, "timestamp": "2025-09-10 02:18:01.539594", "step": 1002, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:01.570881", "step": 1002, "epoch": 1 }, { "type": "loss", "content": 0.03149298205971718, "timestamp": "2025-09-10 02:18:01.574600", "step": 1003, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:01.606718", "step": 1003, "epoch": 1 }, { "type": "loss", "content": 0.02260902337729931, "timestamp": "2025-09-10 02:18:01.637592", "step": 1004, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:01.668452", "step": 1004, "epoch": 1 }, { "type": "loss", "content": 0.009301579557359219, "timestamp": "2025-09-10 02:18:01.676113", "step": 1005, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:01.707010", "step": 1005, "epoch": 1 }, { "type": "loss", "content": 0.017554203048348427, "timestamp": "2025-09-10 02:18:01.714003", "step": 1006, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:01.744576", "step": 1006, "epoch": 1 }, { "type": "loss", "content": 0.02697034180164337, "timestamp": "2025-09-10 02:18:01.748615", "step": 1007, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:01.778798", "step": 1007, "epoch": 1 }, { "type": "loss", "content": 0.013858512975275517, "timestamp": "2025-09-10 02:18:01.806623", "step": 1008, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:01.837166", "step": 1008, "epoch": 1 }, { "type": "loss", "content": 0.01846943609416485, "timestamp": "2025-09-10 02:18:01.842178", "step": 1009, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:01.872899", "step": 1009, "epoch": 1 }, { "type": "loss", "content": 0.028651878237724304, "timestamp": "2025-09-10 02:18:01.879770", "step": 1010, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:01.910494", "step": 1010, "epoch": 1 }, { "type": "loss", "content": 0.015424097888171673, "timestamp": "2025-09-10 02:18:01.920098", "step": 1011, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:01.954475", "step": 1011, "epoch": 1 }, { "type": "loss", "content": 0.03452470153570175, "timestamp": "2025-09-10 02:18:01.979541", "step": 1012, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:02.009239", "step": 1012, "epoch": 1 }, { "type": "loss", "content": 0.01232621818780899, "timestamp": "2025-09-10 02:18:02.011397", "step": 1013, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:02.040787", "step": 1013, "epoch": 1 }, { "type": "loss", "content": 0.006808358710259199, "timestamp": "2025-09-10 02:18:02.045323", "step": 1014, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:02.076071", "step": 1014, "epoch": 1 }, { "type": "loss", "content": 0.011624851264059544, "timestamp": "2025-09-10 02:18:02.086817", "step": 1015, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:02.117025", "step": 1015, "epoch": 1 }, { "type": "loss", "content": 0.024632567539811134, "timestamp": "2025-09-10 02:18:02.145882", "step": 1016, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:02.176481", "step": 1016, "epoch": 1 }, { "type": "loss", "content": 0.017971431836485863, "timestamp": "2025-09-10 02:18:02.181117", "step": 1017, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:02.214734", "step": 1017, "epoch": 1 }, { "type": "loss", "content": 0.0073992046527564526, "timestamp": "2025-09-10 02:18:02.225624", "step": 1018, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:02.263494", "step": 1018, "epoch": 1 }, { "type": "loss", "content": 0.01738920249044895, "timestamp": "2025-09-10 02:18:02.279509", "step": 1019, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:02.310208", "step": 1019, "epoch": 1 }, { "type": "loss", "content": 0.010011442936956882, "timestamp": "2025-09-10 02:18:02.333868", "step": 1020, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:18:02.363806", "step": 1020, "epoch": 1 }, { "type": "loss", "content": 0.01954047754406929, "timestamp": "2025-09-10 02:18:02.366349", "step": 1021, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:02.396560", "step": 1021, "epoch": 1 }, { "type": "loss", "content": 0.021268155425786972, "timestamp": "2025-09-10 02:18:02.403748", "step": 1022, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:02.434350", "step": 1022, "epoch": 1 }, { "type": "loss", "content": 0.023624001070857048, "timestamp": "2025-09-10 02:18:02.446757", "step": 1023, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:18:02.487085", "step": 1023, "epoch": 1 }, { "type": "loss", "content": 0.025291163474321365, "timestamp": "2025-09-10 02:18:02.517046", "step": 1024, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:02.552710", "step": 1024, "epoch": 1 }, { "type": "loss", "content": 0.014068282209336758, "timestamp": "2025-09-10 02:18:02.568165", "step": 1025, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:02.602462", "step": 1025, "epoch": 1 }, { "type": "loss", "content": 0.01854473166167736, "timestamp": "2025-09-10 02:18:02.615856", "step": 1026, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:02.648053", "step": 1026, "epoch": 1 }, { "type": "loss", "content": 0.007462997920811176, "timestamp": "2025-09-10 02:18:02.660020", "step": 1027, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:18:02.695010", "step": 1027, "epoch": 1 }, { "type": "loss", "content": 0.008165022358298302, "timestamp": "2025-09-10 02:18:02.729907", "step": 1028, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:02.759893", "step": 1028, "epoch": 1 }, { "type": "loss", "content": 0.00575115904211998, "timestamp": "2025-09-10 02:18:02.768441", "step": 1029, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:18:12.958862", "step": 1029, "epoch": 1 }, { "type": "pplx", "content": 12598956.534986155, "timestamp": "2025-09-10 02:18:12.961843", "step": 1029, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:12.993434", "step": 1029, "epoch": 1 }, { "type": "loss", "content": 0.025716153904795647, "timestamp": "2025-09-10 02:18:13.001709", "step": 1030, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:13.032761", "step": 1030, "epoch": 1 }, { "type": "loss", "content": 0.011754123494029045, "timestamp": "2025-09-10 02:18:13.037008", "step": 1031, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:13.067369", "step": 1031, "epoch": 1 }, { "type": "loss", "content": 0.01727590523660183, "timestamp": "2025-09-10 02:18:13.091208", "step": 1032, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:13.122124", "step": 1032, "epoch": 1 }, { "type": "loss", "content": 0.014728769659996033, "timestamp": "2025-09-10 02:18:13.126760", "step": 1033, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:13.157423", "step": 1033, "epoch": 1 }, { "type": "loss", "content": 0.01519166398793459, "timestamp": "2025-09-10 02:18:13.160059", "step": 1034, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:13.190660", "step": 1034, "epoch": 1 }, { "type": "loss", "content": 0.003654760541394353, "timestamp": "2025-09-10 02:18:13.198324", "step": 1035, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:13.229987", "step": 1035, "epoch": 1 }, { "type": "loss", "content": 0.013432272709906101, "timestamp": "2025-09-10 02:18:13.257838", "step": 1036, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:13.289765", "step": 1036, "epoch": 1 }, { "type": "loss", "content": 0.017557019367814064, "timestamp": "2025-09-10 02:18:13.300314", "step": 1037, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:13.331706", "step": 1037, "epoch": 1 }, { "type": "loss", "content": 0.008299489505589008, "timestamp": "2025-09-10 02:18:13.335575", "step": 1038, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:13.366588", "step": 1038, "epoch": 1 }, { "type": "loss", "content": 0.008687845431268215, "timestamp": "2025-09-10 02:18:13.371045", "step": 1039, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:13.402670", "step": 1039, "epoch": 1 }, { "type": "loss", "content": 0.023219764232635498, "timestamp": "2025-09-10 02:18:13.435613", "step": 1040, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:18:13.469380", "step": 1040, "epoch": 1 }, { "type": "loss", "content": 0.007147731725126505, "timestamp": "2025-09-10 02:18:13.482656", "step": 1041, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:13.515176", "step": 1041, "epoch": 1 }, { "type": "loss", "content": 0.025176668539643288, "timestamp": "2025-09-10 02:18:13.525281", "step": 1042, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:13.556759", "step": 1042, "epoch": 1 }, { "type": "loss", "content": 0.030310701578855515, "timestamp": "2025-09-10 02:18:13.564096", "step": 1043, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:13.595495", "step": 1043, "epoch": 1 }, { "type": "loss", "content": 0.004898954648524523, "timestamp": "2025-09-10 02:18:13.623811", "step": 1044, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:13.654800", "step": 1044, "epoch": 1 }, { "type": "loss", "content": 0.0060010491870343685, "timestamp": "2025-09-10 02:18:13.664468", "step": 1045, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:18:13.704827", "step": 1045, "epoch": 1 }, { "type": "loss", "content": 0.00663131894543767, "timestamp": "2025-09-10 02:18:13.721022", "step": 1046, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:13.752850", "step": 1046, "epoch": 1 }, { "type": "loss", "content": 0.004566808696836233, "timestamp": "2025-09-10 02:18:13.760384", "step": 1047, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:13.791912", "step": 1047, "epoch": 1 }, { "type": "loss", "content": 0.034333836287260056, "timestamp": "2025-09-10 02:18:13.819760", "step": 1048, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:13.853412", "step": 1048, "epoch": 1 }, { "type": "loss", "content": 0.02081063576042652, "timestamp": "2025-09-10 02:18:13.863158", "step": 1049, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:13.895760", "step": 1049, "epoch": 1 }, { "type": "loss", "content": 0.02265256643295288, "timestamp": "2025-09-10 02:18:13.903213", "step": 1050, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:13.936643", "step": 1050, "epoch": 1 }, { "type": "loss", "content": 0.023109683766961098, "timestamp": "2025-09-10 02:18:13.946822", "step": 1051, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:13.981992", "step": 1051, "epoch": 1 }, { "type": "loss", "content": 0.008987885899841785, "timestamp": "2025-09-10 02:18:14.016298", "step": 1052, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:14.049436", "step": 1052, "epoch": 1 }, { "type": "loss", "content": 0.015795622020959854, "timestamp": "2025-09-10 02:18:14.051658", "step": 1053, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:14.086452", "step": 1053, "epoch": 1 }, { "type": "loss", "content": 0.0090614790096879, "timestamp": "2025-09-10 02:18:14.099780", "step": 1054, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:14.131728", "step": 1054, "epoch": 1 }, { "type": "loss", "content": 0.0187073964625597, "timestamp": "2025-09-10 02:18:14.141814", "step": 1055, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.174157", "step": 1055, "epoch": 1 }, { "type": "loss", "content": 0.02702743373811245, "timestamp": "2025-09-10 02:18:14.202776", "step": 1056, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.233455", "step": 1056, "epoch": 1 }, { "type": "loss", "content": 0.02460920810699463, "timestamp": "2025-09-10 02:18:14.239029", "step": 1057, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.270798", "step": 1057, "epoch": 1 }, { "type": "loss", "content": 0.009660584852099419, "timestamp": "2025-09-10 02:18:14.278697", "step": 1058, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.310361", "step": 1058, "epoch": 1 }, { "type": "loss", "content": 0.020776310935616493, "timestamp": "2025-09-10 02:18:14.318047", "step": 1059, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.349936", "step": 1059, "epoch": 1 }, { "type": "loss", "content": 0.02294449508190155, "timestamp": "2025-09-10 02:18:14.378601", "step": 1060, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:14.410728", "step": 1060, "epoch": 1 }, { "type": "loss", "content": 0.017590373754501343, "timestamp": "2025-09-10 02:18:14.413245", "step": 1061, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:18:14.448846", "step": 1061, "epoch": 1 }, { "type": "loss", "content": 0.006449908018112183, "timestamp": "2025-09-10 02:18:14.462548", "step": 1062, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:14.494747", "step": 1062, "epoch": 1 }, { "type": "loss", "content": 0.03565583750605583, "timestamp": "2025-09-10 02:18:14.505002", "step": 1063, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:18:14.547850", "step": 1063, "epoch": 1 }, { "type": "loss", "content": 0.0327337309718132, "timestamp": "2025-09-10 02:18:14.586097", "step": 1064, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:14.617996", "step": 1064, "epoch": 1 }, { "type": "loss", "content": 0.02565723843872547, "timestamp": "2025-09-10 02:18:14.622747", "step": 1065, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:14.654346", "step": 1065, "epoch": 1 }, { "type": "loss", "content": 0.009299799799919128, "timestamp": "2025-09-10 02:18:14.661114", "step": 1066, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:14.692825", "step": 1066, "epoch": 1 }, { "type": "loss", "content": 0.014539425261318684, "timestamp": "2025-09-10 02:18:14.699563", "step": 1067, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:14.731919", "step": 1067, "epoch": 1 }, { "type": "loss", "content": 0.0058663212694227695, "timestamp": "2025-09-10 02:18:14.762906", "step": 1068, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:14.797242", "step": 1068, "epoch": 1 }, { "type": "loss", "content": 0.013086764141917229, "timestamp": "2025-09-10 02:18:14.802463", "step": 1069, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:14.833918", "step": 1069, "epoch": 1 }, { "type": "loss", "content": 0.006901100277900696, "timestamp": "2025-09-10 02:18:14.841018", "step": 1070, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:14.871397", "step": 1070, "epoch": 1 }, { "type": "loss", "content": 0.03987196460366249, "timestamp": "2025-09-10 02:18:14.881603", "step": 1071, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.913411", "step": 1071, "epoch": 1 }, { "type": "loss", "content": 0.019523393362760544, "timestamp": "2025-09-10 02:18:14.941939", "step": 1072, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:14.972877", "step": 1072, "epoch": 1 }, { "type": "loss", "content": 0.008477416820824146, "timestamp": "2025-09-10 02:18:14.978088", "step": 1073, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:15.009439", "step": 1073, "epoch": 1 }, { "type": "loss", "content": 0.00965914037078619, "timestamp": "2025-09-10 02:18:15.016235", "step": 1074, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:15.047147", "step": 1074, "epoch": 1 }, { "type": "loss", "content": 0.030519306659698486, "timestamp": "2025-09-10 02:18:15.054225", "step": 1075, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:15.084514", "step": 1075, "epoch": 1 }, { "type": "loss", "content": 0.006563273724168539, "timestamp": "2025-09-10 02:18:15.109753", "step": 1076, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:15.140018", "step": 1076, "epoch": 1 }, { "type": "loss", "content": 0.0034011027310043573, "timestamp": "2025-09-10 02:18:15.142293", "step": 1077, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:15.173398", "step": 1077, "epoch": 1 }, { "type": "loss", "content": 0.00994145218282938, "timestamp": "2025-09-10 02:18:15.180346", "step": 1078, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:15.211068", "step": 1078, "epoch": 1 }, { "type": "loss", "content": 0.00793201569467783, "timestamp": "2025-09-10 02:18:15.218799", "step": 1079, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:15.249649", "step": 1079, "epoch": 1 }, { "type": "loss", "content": 0.006165057886391878, "timestamp": "2025-09-10 02:18:15.278386", "step": 1080, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:15.309899", "step": 1080, "epoch": 1 }, { "type": "loss", "content": 0.017197439447045326, "timestamp": "2025-09-10 02:18:15.314546", "step": 1081, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:15.348968", "step": 1081, "epoch": 1 }, { "type": "loss", "content": 0.011048262938857079, "timestamp": "2025-09-10 02:18:15.362821", "step": 1082, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:15.395062", "step": 1082, "epoch": 1 }, { "type": "loss", "content": 0.016790146008133888, "timestamp": "2025-09-10 02:18:15.402803", "step": 1083, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:15.434194", "step": 1083, "epoch": 1 }, { "type": "loss", "content": 0.013046172447502613, "timestamp": "2025-09-10 02:18:15.462016", "step": 1084, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:15.493866", "step": 1084, "epoch": 1 }, { "type": "loss", "content": 0.015779945999383926, "timestamp": "2025-09-10 02:18:15.501637", "step": 1085, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:15.533252", "step": 1085, "epoch": 1 }, { "type": "loss", "content": 0.021666022017598152, "timestamp": "2025-09-10 02:18:15.540188", "step": 1086, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:15.571241", "step": 1086, "epoch": 1 }, { "type": "loss", "content": 0.007535271812230349, "timestamp": "2025-09-10 02:18:15.581785", "step": 1087, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:15.614685", "step": 1087, "epoch": 1 }, { "type": "loss", "content": 0.012627107091248035, "timestamp": "2025-09-10 02:18:15.645687", "step": 1088, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:15.676355", "step": 1088, "epoch": 1 }, { "type": "loss", "content": 0.021864308044314384, "timestamp": "2025-09-10 02:18:15.681456", "step": 1089, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:15.713779", "step": 1089, "epoch": 1 }, { "type": "loss", "content": 0.008793273940682411, "timestamp": "2025-09-10 02:18:15.724719", "step": 1090, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:15.756581", "step": 1090, "epoch": 1 }, { "type": "loss", "content": 0.0030675516463816166, "timestamp": "2025-09-10 02:18:15.763522", "step": 1091, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:15.794568", "step": 1091, "epoch": 1 }, { "type": "loss", "content": 0.009418687783181667, "timestamp": "2025-09-10 02:18:15.826297", "step": 1092, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:15.859361", "step": 1092, "epoch": 1 }, { "type": "loss", "content": 0.005299612879753113, "timestamp": "2025-09-10 02:18:15.869132", "step": 1093, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:15.899394", "step": 1093, "epoch": 1 }, { "type": "loss", "content": 0.018485212698578835, "timestamp": "2025-09-10 02:18:15.906197", "step": 1094, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:15.936910", "step": 1094, "epoch": 1 }, { "type": "loss", "content": 0.019424965605139732, "timestamp": "2025-09-10 02:18:15.947055", "step": 1095, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:15.978515", "step": 1095, "epoch": 1 }, { "type": "loss", "content": 0.012880226597189903, "timestamp": "2025-09-10 02:18:16.003377", "step": 1096, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:16.034727", "step": 1096, "epoch": 1 }, { "type": "loss", "content": 0.007060025352984667, "timestamp": "2025-09-10 02:18:16.038943", "step": 1097, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:16.069518", "step": 1097, "epoch": 1 }, { "type": "loss", "content": 0.0054059443064033985, "timestamp": "2025-09-10 02:18:16.082033", "step": 1098, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:16.122331", "step": 1098, "epoch": 1 }, { "type": "loss", "content": 0.018658744171261787, "timestamp": "2025-09-10 02:18:16.138003", "step": 1099, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:16.168621", "step": 1099, "epoch": 1 }, { "type": "loss", "content": 0.003897774498909712, "timestamp": "2025-09-10 02:18:16.193726", "step": 1100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:16.224671", "step": 1100, "epoch": 1 }, { "type": "loss", "content": 0.009347101673483849, "timestamp": "2025-09-10 02:18:16.230213", "step": 1101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:16.260845", "step": 1101, "epoch": 1 }, { "type": "loss", "content": 0.03616241365671158, "timestamp": "2025-09-10 02:18:16.264931", "step": 1102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:16.298967", "step": 1102, "epoch": 1 }, { "type": "loss", "content": 0.006681836675852537, "timestamp": "2025-09-10 02:18:16.312335", "step": 1103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:16.343253", "step": 1103, "epoch": 1 }, { "type": "loss", "content": 0.008854770101606846, "timestamp": "2025-09-10 02:18:16.374312", "step": 1104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:16.405424", "step": 1104, "epoch": 1 }, { "type": "loss", "content": 0.004546549171209335, "timestamp": "2025-09-10 02:18:16.407728", "step": 1105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:16.437927", "step": 1105, "epoch": 1 }, { "type": "loss", "content": 0.010041974484920502, "timestamp": "2025-09-10 02:18:16.440422", "step": 1106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:18:16.474803", "step": 1106, "epoch": 1 }, { "type": "loss", "content": 0.0157835241407156, "timestamp": "2025-09-10 02:18:16.488466", "step": 1107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:16.520358", "step": 1107, "epoch": 1 }, { "type": "loss", "content": 0.0020716842263936996, "timestamp": "2025-09-10 02:18:16.553346", "step": 1108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:16.584568", "step": 1108, "epoch": 1 }, { "type": "loss", "content": 0.029057949781417847, "timestamp": "2025-09-10 02:18:16.588795", "step": 1109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:16.621696", "step": 1109, "epoch": 1 }, { "type": "loss", "content": 0.02135612629354, "timestamp": "2025-09-10 02:18:16.629582", "step": 1110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:16.660800", "step": 1110, "epoch": 1 }, { "type": "loss", "content": 0.0028702733106911182, "timestamp": "2025-09-10 02:18:16.665292", "step": 1111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:16.695943", "step": 1111, "epoch": 1 }, { "type": "loss", "content": 0.013597295619547367, "timestamp": "2025-09-10 02:18:16.720928", "step": 1112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:16.751987", "step": 1112, "epoch": 1 }, { "type": "loss", "content": 0.0027478632982820272, "timestamp": "2025-09-10 02:18:16.756533", "step": 1113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:16.787835", "step": 1113, "epoch": 1 }, { "type": "loss", "content": 0.0023722779005765915, "timestamp": "2025-09-10 02:18:16.794785", "step": 1114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:16.826397", "step": 1114, "epoch": 1 }, { "type": "loss", "content": 0.014487197622656822, "timestamp": "2025-09-10 02:18:16.833719", "step": 1115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:16.866648", "step": 1115, "epoch": 1 }, { "type": "loss", "content": 0.015417618677020073, "timestamp": "2025-09-10 02:18:16.895241", "step": 1116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:16.927334", "step": 1116, "epoch": 1 }, { "type": "loss", "content": 0.005695224739611149, "timestamp": "2025-09-10 02:18:16.929586", "step": 1117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:16.959925", "step": 1117, "epoch": 1 }, { "type": "loss", "content": 0.04347721487283707, "timestamp": "2025-09-10 02:18:16.964520", "step": 1118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:16.998114", "step": 1118, "epoch": 1 }, { "type": "loss", "content": 0.010189319029450417, "timestamp": "2025-09-10 02:18:17.002800", "step": 1119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:17.034445", "step": 1119, "epoch": 1 }, { "type": "loss", "content": 0.020940367132425308, "timestamp": "2025-09-10 02:18:17.063030", "step": 1120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:17.094110", "step": 1120, "epoch": 1 }, { "type": "loss", "content": 0.004970818292349577, "timestamp": "2025-09-10 02:18:17.098676", "step": 1121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:17.131615", "step": 1121, "epoch": 1 }, { "type": "loss", "content": 0.004925449378788471, "timestamp": "2025-09-10 02:18:17.135945", "step": 1122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:17.167238", "step": 1122, "epoch": 1 }, { "type": "loss", "content": 0.0076041617430746555, "timestamp": "2025-09-10 02:18:17.174914", "step": 1123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:17.205923", "step": 1123, "epoch": 1 }, { "type": "loss", "content": 0.004425295628607273, "timestamp": "2025-09-10 02:18:17.234476", "step": 1124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:17.266629", "step": 1124, "epoch": 1 }, { "type": "loss", "content": 0.022850140929222107, "timestamp": "2025-09-10 02:18:17.279343", "step": 1125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:17.310177", "step": 1125, "epoch": 1 }, { "type": "loss", "content": 0.008794148452579975, "timestamp": "2025-09-10 02:18:17.317476", "step": 1126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:17.347814", "step": 1126, "epoch": 1 }, { "type": "loss", "content": 0.02646883763372898, "timestamp": "2025-09-10 02:18:17.351956", "step": 1127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:17.382198", "step": 1127, "epoch": 1 }, { "type": "loss", "content": 0.015476626344025135, "timestamp": "2025-09-10 02:18:17.410749", "step": 1128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:17.443000", "step": 1128, "epoch": 1 }, { "type": "loss", "content": 0.0023650110233575106, "timestamp": "2025-09-10 02:18:17.455617", "step": 1129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:17.487098", "step": 1129, "epoch": 1 }, { "type": "loss", "content": 0.0014028213918209076, "timestamp": "2025-09-10 02:18:17.494611", "step": 1130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:17.525210", "step": 1130, "epoch": 1 }, { "type": "loss", "content": 0.006255102809518576, "timestamp": "2025-09-10 02:18:17.535283", "step": 1131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:17.566091", "step": 1131, "epoch": 1 }, { "type": "loss", "content": 0.007090611848980188, "timestamp": "2025-09-10 02:18:17.591020", "step": 1132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:17.622925", "step": 1132, "epoch": 1 }, { "type": "loss", "content": 0.015480038709938526, "timestamp": "2025-09-10 02:18:17.627184", "step": 1133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:17.658081", "step": 1133, "epoch": 1 }, { "type": "loss", "content": 0.004326352383941412, "timestamp": "2025-09-10 02:18:17.665220", "step": 1134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:17.696819", "step": 1134, "epoch": 1 }, { "type": "loss", "content": 0.02047579549252987, "timestamp": "2025-09-10 02:18:17.703921", "step": 1135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:17.736305", "step": 1135, "epoch": 1 }, { "type": "loss", "content": 0.0026631599757820368, "timestamp": "2025-09-10 02:18:17.764785", "step": 1136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:17.800907", "step": 1136, "epoch": 1 }, { "type": "loss", "content": 0.013182473368942738, "timestamp": "2025-09-10 02:18:17.816089", "step": 1137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:17.847937", "step": 1137, "epoch": 1 }, { "type": "loss", "content": 0.010062654502689838, "timestamp": "2025-09-10 02:18:17.852496", "step": 1138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:18:17.886744", "step": 1138, "epoch": 1 }, { "type": "loss", "content": 0.003278909483924508, "timestamp": "2025-09-10 02:18:17.900412", "step": 1139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:17.930709", "step": 1139, "epoch": 1 }, { "type": "loss", "content": 0.0194843877106905, "timestamp": "2025-09-10 02:18:17.954539", "step": 1140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:17.986665", "step": 1140, "epoch": 1 }, { "type": "loss", "content": 0.002890173811465502, "timestamp": "2025-09-10 02:18:17.992197", "step": 1141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:18.024715", "step": 1141, "epoch": 1 }, { "type": "loss", "content": 0.004366376902908087, "timestamp": "2025-09-10 02:18:18.035599", "step": 1142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:18.066749", "step": 1142, "epoch": 1 }, { "type": "loss", "content": 0.005158752668648958, "timestamp": "2025-09-10 02:18:18.074300", "step": 1143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.108596", "step": 1143, "epoch": 1 }, { "type": "loss", "content": 0.009630167856812477, "timestamp": "2025-09-10 02:18:18.137319", "step": 1144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:18.183432", "step": 1144, "epoch": 1 }, { "type": "loss", "content": 0.012025467120110989, "timestamp": "2025-09-10 02:18:18.187852", "step": 1145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:18.219751", "step": 1145, "epoch": 1 }, { "type": "loss", "content": 0.0020432344172149897, "timestamp": "2025-09-10 02:18:18.222161", "step": 1146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:18.253028", "step": 1146, "epoch": 1 }, { "type": "loss", "content": 0.006666641216725111, "timestamp": "2025-09-10 02:18:18.260029", "step": 1147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:18.290912", "step": 1147, "epoch": 1 }, { "type": "loss", "content": 0.007748906966298819, "timestamp": "2025-09-10 02:18:18.316141", "step": 1148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.348874", "step": 1148, "epoch": 1 }, { "type": "loss", "content": 0.02841871976852417, "timestamp": "2025-09-10 02:18:18.354345", "step": 1149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.385509", "step": 1149, "epoch": 1 }, { "type": "loss", "content": 0.0032413543667644262, "timestamp": "2025-09-10 02:18:18.393360", "step": 1150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:18.423775", "step": 1150, "epoch": 1 }, { "type": "loss", "content": 0.010790413245558739, "timestamp": "2025-09-10 02:18:18.431200", "step": 1151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:18.461383", "step": 1151, "epoch": 1 }, { "type": "loss", "content": 0.017973562702536583, "timestamp": "2025-09-10 02:18:18.494542", "step": 1152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:18.525330", "step": 1152, "epoch": 1 }, { "type": "loss", "content": 0.002686847234144807, "timestamp": "2025-09-10 02:18:18.529764", "step": 1153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:18.562080", "step": 1153, "epoch": 1 }, { "type": "loss", "content": 0.002674214309081435, "timestamp": "2025-09-10 02:18:18.567873", "step": 1154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:18.599988", "step": 1154, "epoch": 1 }, { "type": "loss", "content": 0.0012537644943222404, "timestamp": "2025-09-10 02:18:18.608841", "step": 1155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.642028", "step": 1155, "epoch": 1 }, { "type": "loss", "content": 0.011615641415119171, "timestamp": "2025-09-10 02:18:18.669363", "step": 1156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:18.701750", "step": 1156, "epoch": 1 }, { "type": "loss", "content": 0.01832910068333149, "timestamp": "2025-09-10 02:18:18.703887", "step": 1157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:18.735275", "step": 1157, "epoch": 1 }, { "type": "loss", "content": 0.006739361677318811, "timestamp": "2025-09-10 02:18:18.741931", "step": 1158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:18.772936", "step": 1158, "epoch": 1 }, { "type": "loss", "content": 0.01089425478130579, "timestamp": "2025-09-10 02:18:18.779689", "step": 1159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:18.810853", "step": 1159, "epoch": 1 }, { "type": "loss", "content": 0.00913853757083416, "timestamp": "2025-09-10 02:18:18.838352", "step": 1160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:18.874197", "step": 1160, "epoch": 1 }, { "type": "loss", "content": 0.014518055133521557, "timestamp": "2025-09-10 02:18:18.881380", "step": 1161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.913054", "step": 1161, "epoch": 1 }, { "type": "loss", "content": 0.006596317049115896, "timestamp": "2025-09-10 02:18:18.920645", "step": 1162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:18.952024", "step": 1162, "epoch": 1 }, { "type": "loss", "content": 0.014293434098362923, "timestamp": "2025-09-10 02:18:18.959596", "step": 1163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:18.991733", "step": 1163, "epoch": 1 }, { "type": "loss", "content": 0.010215037502348423, "timestamp": "2025-09-10 02:18:19.016672", "step": 1164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:19.048796", "step": 1164, "epoch": 1 }, { "type": "loss", "content": 0.030613288283348083, "timestamp": "2025-09-10 02:18:19.053700", "step": 1165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:19.085341", "step": 1165, "epoch": 1 }, { "type": "loss", "content": 0.0022717637475579977, "timestamp": "2025-09-10 02:18:19.089396", "step": 1166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:19.120100", "step": 1166, "epoch": 1 }, { "type": "loss", "content": 0.009593302384018898, "timestamp": "2025-09-10 02:18:19.127529", "step": 1167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:19.159440", "step": 1167, "epoch": 1 }, { "type": "loss", "content": 0.006897877436131239, "timestamp": "2025-09-10 02:18:19.190357", "step": 1168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:19.227629", "step": 1168, "epoch": 1 }, { "type": "loss", "content": 0.06167227774858475, "timestamp": "2025-09-10 02:18:19.242984", "step": 1169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:19.274562", "step": 1169, "epoch": 1 }, { "type": "loss", "content": 0.04564559459686279, "timestamp": "2025-09-10 02:18:19.278259", "step": 1170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:19.310480", "step": 1170, "epoch": 1 }, { "type": "loss", "content": 0.0009320880053564906, "timestamp": "2025-09-10 02:18:19.317812", "step": 1171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:19.350416", "step": 1171, "epoch": 1 }, { "type": "loss", "content": 0.002427774015814066, "timestamp": "2025-09-10 02:18:19.382777", "step": 1172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:19.416252", "step": 1172, "epoch": 1 }, { "type": "loss", "content": 0.010821384377777576, "timestamp": "2025-09-10 02:18:19.424201", "step": 1173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:19.458546", "step": 1173, "epoch": 1 }, { "type": "loss", "content": 0.025808248668909073, "timestamp": "2025-09-10 02:18:19.462683", "step": 1174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:18:19.504314", "step": 1174, "epoch": 1 }, { "type": "loss", "content": 0.061185259371995926, "timestamp": "2025-09-10 02:18:19.520458", "step": 1175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:19.551166", "step": 1175, "epoch": 1 }, { "type": "loss", "content": 0.0003842521400656551, "timestamp": "2025-09-10 02:18:19.582991", "step": 1176, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:18:29.776063", "step": 1176, "epoch": 1 }, { "type": "pplx", "content": 16784163.124731667, "timestamp": "2025-09-10 02:18:29.779308", "step": 1176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:29.810633", "step": 1176, "epoch": 1 }, { "type": "loss", "content": 0.017028305679559708, "timestamp": "2025-09-10 02:18:29.818914", "step": 1177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:29.850203", "step": 1177, "epoch": 1 }, { "type": "loss", "content": 0.03579110652208328, "timestamp": "2025-09-10 02:18:29.854250", "step": 1178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:29.885761", "step": 1178, "epoch": 1 }, { "type": "loss", "content": 0.012345547787845135, "timestamp": "2025-09-10 02:18:29.893048", "step": 1179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:18:29.923823", "step": 1179, "epoch": 1 }, { "type": "loss", "content": 0.014899312518537045, "timestamp": "2025-09-10 02:18:29.947959", "step": 1180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:29.978721", "step": 1180, "epoch": 1 }, { "type": "loss", "content": 0.008222085423767567, "timestamp": "2025-09-10 02:18:29.980848", "step": 1181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:18:30.023345", "step": 1181, "epoch": 1 }, { "type": "loss", "content": 0.0018380869878455997, "timestamp": "2025-09-10 02:18:30.041100", "step": 1182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:30.072675", "step": 1182, "epoch": 1 }, { "type": "loss", "content": 0.01529020071029663, "timestamp": "2025-09-10 02:18:30.083507", "step": 1183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:30.115357", "step": 1183, "epoch": 1 }, { "type": "loss", "content": 0.0014780040364712477, "timestamp": "2025-09-10 02:18:30.139939", "step": 1184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:30.171392", "step": 1184, "epoch": 1 }, { "type": "loss", "content": 0.05387040600180626, "timestamp": "2025-09-10 02:18:30.175793", "step": 1185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:30.206809", "step": 1185, "epoch": 1 }, { "type": "loss", "content": 0.022361472249031067, "timestamp": "2025-09-10 02:18:30.213526", "step": 1186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:30.246344", "step": 1186, "epoch": 1 }, { "type": "loss", "content": 0.004187957849353552, "timestamp": "2025-09-10 02:18:30.257187", "step": 1187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:30.288529", "step": 1187, "epoch": 1 }, { "type": "loss", "content": 0.00469655217602849, "timestamp": "2025-09-10 02:18:30.311975", "step": 1188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:30.343483", "step": 1188, "epoch": 1 }, { "type": "loss", "content": 0.013641082681715488, "timestamp": "2025-09-10 02:18:30.347869", "step": 1189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:30.379141", "step": 1189, "epoch": 1 }, { "type": "loss", "content": 0.024424701929092407, "timestamp": "2025-09-10 02:18:30.386124", "step": 1190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:30.417664", "step": 1190, "epoch": 1 }, { "type": "loss", "content": 0.01394572388380766, "timestamp": "2025-09-10 02:18:30.429702", "step": 1191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:30.460633", "step": 1191, "epoch": 1 }, { "type": "loss", "content": 0.011202634312212467, "timestamp": "2025-09-10 02:18:30.488313", "step": 1192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:30.520611", "step": 1192, "epoch": 1 }, { "type": "loss", "content": 0.00934526789933443, "timestamp": "2025-09-10 02:18:30.533275", "step": 1193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:30.564516", "step": 1193, "epoch": 1 }, { "type": "loss", "content": 0.013034219853579998, "timestamp": "2025-09-10 02:18:30.572148", "step": 1194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:18:30.626335", "step": 1194, "epoch": 1 }, { "type": "loss", "content": 0.016260338947176933, "timestamp": "2025-09-10 02:18:30.648043", "step": 1195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:30.679582", "step": 1195, "epoch": 1 }, { "type": "loss", "content": 0.01205496210604906, "timestamp": "2025-09-10 02:18:30.711057", "step": 1196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:30.742251", "step": 1196, "epoch": 1 }, { "type": "loss", "content": 0.008653457276523113, "timestamp": "2025-09-10 02:18:30.747482", "step": 1197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:30.785544", "step": 1197, "epoch": 1 }, { "type": "loss", "content": 0.017093000933527946, "timestamp": "2025-09-10 02:18:30.801131", "step": 1198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:30.833495", "step": 1198, "epoch": 1 }, { "type": "loss", "content": 0.004972951021045446, "timestamp": "2025-09-10 02:18:30.840111", "step": 1199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:30.871337", "step": 1199, "epoch": 1 }, { "type": "loss", "content": 0.027272850275039673, "timestamp": "2025-09-10 02:18:30.896443", "step": 1200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:30.929351", "step": 1200, "epoch": 1 }, { "type": "loss", "content": 0.011625121347606182, "timestamp": "2025-09-10 02:18:30.942023", "step": 1201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:18:30.983449", "step": 1201, "epoch": 1 }, { "type": "loss", "content": 0.03317030146718025, "timestamp": "2025-09-10 02:18:31.000458", "step": 1202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:18:31.036444", "step": 1202, "epoch": 1 }, { "type": "loss", "content": 0.002671575639396906, "timestamp": "2025-09-10 02:18:31.050463", "step": 1203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:31.083564", "step": 1203, "epoch": 1 }, { "type": "loss", "content": 0.009755785576999187, "timestamp": "2025-09-10 02:18:31.111385", "step": 1204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:31.149839", "step": 1204, "epoch": 1 }, { "type": "loss", "content": 0.03215007483959198, "timestamp": "2025-09-10 02:18:31.159052", "step": 1205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:31.196525", "step": 1205, "epoch": 1 }, { "type": "loss", "content": 0.010887703858315945, "timestamp": "2025-09-10 02:18:31.207286", "step": 1206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:31.251369", "step": 1206, "epoch": 1 }, { "type": "loss", "content": 0.020919183269143105, "timestamp": "2025-09-10 02:18:31.261938", "step": 1207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:31.299237", "step": 1207, "epoch": 1 }, { "type": "loss", "content": 0.04709470644593239, "timestamp": "2025-09-10 02:18:31.332133", "step": 1208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:31.374807", "step": 1208, "epoch": 1 }, { "type": "loss", "content": 0.01061093620955944, "timestamp": "2025-09-10 02:18:31.378490", "step": 1209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.415608", "step": 1209, "epoch": 1 }, { "type": "loss", "content": 0.029514219611883163, "timestamp": "2025-09-10 02:18:31.422753", "step": 1210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:31.455106", "step": 1210, "epoch": 1 }, { "type": "loss", "content": 0.012318803928792477, "timestamp": "2025-09-10 02:18:31.465102", "step": 1211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:31.497093", "step": 1211, "epoch": 1 }, { "type": "loss", "content": 0.018550723791122437, "timestamp": "2025-09-10 02:18:31.524402", "step": 1212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.557803", "step": 1212, "epoch": 1 }, { "type": "loss", "content": 0.008835774846374989, "timestamp": "2025-09-10 02:18:31.562204", "step": 1213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:31.595282", "step": 1213, "epoch": 1 }, { "type": "loss", "content": 0.01745608262717724, "timestamp": "2025-09-10 02:18:31.601476", "step": 1214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:31.633237", "step": 1214, "epoch": 1 }, { "type": "loss", "content": 0.011802353896200657, "timestamp": "2025-09-10 02:18:31.642797", "step": 1215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:31.674542", "step": 1215, "epoch": 1 }, { "type": "loss", "content": 0.01431284286081791, "timestamp": "2025-09-10 02:18:31.705475", "step": 1216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:31.737086", "step": 1216, "epoch": 1 }, { "type": "loss", "content": 0.025099601596593857, "timestamp": "2025-09-10 02:18:31.739302", "step": 1217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.770797", "step": 1217, "epoch": 1 }, { "type": "loss", "content": 0.02076675556600094, "timestamp": "2025-09-10 02:18:31.778171", "step": 1218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.809728", "step": 1218, "epoch": 1 }, { "type": "loss", "content": 0.01054247748106718, "timestamp": "2025-09-10 02:18:31.817146", "step": 1219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.848780", "step": 1219, "epoch": 1 }, { "type": "loss", "content": 0.00928487628698349, "timestamp": "2025-09-10 02:18:31.877107", "step": 1220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:31.907984", "step": 1220, "epoch": 1 }, { "type": "loss", "content": 0.022254247218370438, "timestamp": "2025-09-10 02:18:31.910448", "step": 1221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:31.941402", "step": 1221, "epoch": 1 }, { "type": "loss", "content": 0.011745232157409191, "timestamp": "2025-09-10 02:18:31.945587", "step": 1222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:31.976636", "step": 1222, "epoch": 1 }, { "type": "loss", "content": 0.02063934877514839, "timestamp": "2025-09-10 02:18:31.984337", "step": 1223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:32.016405", "step": 1223, "epoch": 1 }, { "type": "loss", "content": 0.039252448827028275, "timestamp": "2025-09-10 02:18:32.049530", "step": 1224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:32.082554", "step": 1224, "epoch": 1 }, { "type": "loss", "content": 0.012943130917847157, "timestamp": "2025-09-10 02:18:32.084596", "step": 1225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:32.116154", "step": 1225, "epoch": 1 }, { "type": "loss", "content": 0.02003873698413372, "timestamp": "2025-09-10 02:18:32.128135", "step": 1226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:32.159589", "step": 1226, "epoch": 1 }, { "type": "loss", "content": 0.02976025640964508, "timestamp": "2025-09-10 02:18:32.164024", "step": 1227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:32.194749", "step": 1227, "epoch": 1 }, { "type": "loss", "content": 0.012167098931968212, "timestamp": "2025-09-10 02:18:32.220046", "step": 1228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:32.251374", "step": 1228, "epoch": 1 }, { "type": "loss", "content": 0.021341700106859207, "timestamp": "2025-09-10 02:18:32.259213", "step": 1229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:32.289904", "step": 1229, "epoch": 1 }, { "type": "loss", "content": 0.005283652804791927, "timestamp": "2025-09-10 02:18:32.297233", "step": 1230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.329083", "step": 1230, "epoch": 1 }, { "type": "loss", "content": 0.01109201367944479, "timestamp": "2025-09-10 02:18:32.336101", "step": 1231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:32.367104", "step": 1231, "epoch": 1 }, { "type": "loss", "content": 0.023132245987653732, "timestamp": "2025-09-10 02:18:32.400218", "step": 1232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.430711", "step": 1232, "epoch": 1 }, { "type": "loss", "content": 0.010825731791555882, "timestamp": "2025-09-10 02:18:32.435551", "step": 1233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:32.466094", "step": 1233, "epoch": 1 }, { "type": "loss", "content": 0.02231140062212944, "timestamp": "2025-09-10 02:18:32.470249", "step": 1234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.502763", "step": 1234, "epoch": 1 }, { "type": "loss", "content": 0.029144972562789917, "timestamp": "2025-09-10 02:18:32.508228", "step": 1235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.539563", "step": 1235, "epoch": 1 }, { "type": "loss", "content": 0.005374426953494549, "timestamp": "2025-09-10 02:18:32.567470", "step": 1236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:32.605255", "step": 1236, "epoch": 1 }, { "type": "loss", "content": 0.02825375273823738, "timestamp": "2025-09-10 02:18:32.612149", "step": 1237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:32.645025", "step": 1237, "epoch": 1 }, { "type": "loss", "content": 0.020113468170166016, "timestamp": "2025-09-10 02:18:32.657017", "step": 1238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:32.690869", "step": 1238, "epoch": 1 }, { "type": "loss", "content": 0.018397843465209007, "timestamp": "2025-09-10 02:18:32.701867", "step": 1239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:32.735593", "step": 1239, "epoch": 1 }, { "type": "loss", "content": 0.009672732092440128, "timestamp": "2025-09-10 02:18:32.763428", "step": 1240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:32.794477", "step": 1240, "epoch": 1 }, { "type": "loss", "content": 0.015301401726901531, "timestamp": "2025-09-10 02:18:32.799729", "step": 1241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.832384", "step": 1241, "epoch": 1 }, { "type": "loss", "content": 0.003928063903003931, "timestamp": "2025-09-10 02:18:32.839501", "step": 1242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:32.871701", "step": 1242, "epoch": 1 }, { "type": "loss", "content": 0.011773375794291496, "timestamp": "2025-09-10 02:18:32.878621", "step": 1243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:32.918003", "step": 1243, "epoch": 1 }, { "type": "loss", "content": 0.0038322594482451677, "timestamp": "2025-09-10 02:18:32.946709", "step": 1244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:32.978578", "step": 1244, "epoch": 1 }, { "type": "loss", "content": 0.049418624490499496, "timestamp": "2025-09-10 02:18:32.983159", "step": 1245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:33.013761", "step": 1245, "epoch": 1 }, { "type": "loss", "content": 0.026037881150841713, "timestamp": "2025-09-10 02:18:33.020631", "step": 1246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:33.051791", "step": 1246, "epoch": 1 }, { "type": "loss", "content": 0.028841393068432808, "timestamp": "2025-09-10 02:18:33.058811", "step": 1247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:33.091678", "step": 1247, "epoch": 1 }, { "type": "loss", "content": 0.00434449827298522, "timestamp": "2025-09-10 02:18:33.120341", "step": 1248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:33.151789", "step": 1248, "epoch": 1 }, { "type": "loss", "content": 0.0594901405274868, "timestamp": "2025-09-10 02:18:33.154354", "step": 1249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:33.193377", "step": 1249, "epoch": 1 }, { "type": "loss", "content": 0.02595318667590618, "timestamp": "2025-09-10 02:18:33.209070", "step": 1250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:33.241081", "step": 1250, "epoch": 1 }, { "type": "loss", "content": 0.009637218900024891, "timestamp": "2025-09-10 02:18:33.251101", "step": 1251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:33.282406", "step": 1251, "epoch": 1 }, { "type": "loss", "content": 0.020436033606529236, "timestamp": "2025-09-10 02:18:33.310081", "step": 1252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:33.342006", "step": 1252, "epoch": 1 }, { "type": "loss", "content": 0.014274738729000092, "timestamp": "2025-09-10 02:18:33.351794", "step": 1253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:33.383906", "step": 1253, "epoch": 1 }, { "type": "loss", "content": 0.01211103331297636, "timestamp": "2025-09-10 02:18:33.393822", "step": 1254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:33.427460", "step": 1254, "epoch": 1 }, { "type": "loss", "content": 0.0014368664706125855, "timestamp": "2025-09-10 02:18:33.434463", "step": 1255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:33.465462", "step": 1255, "epoch": 1 }, { "type": "loss", "content": 0.024746278300881386, "timestamp": "2025-09-10 02:18:33.493327", "step": 1256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:33.524549", "step": 1256, "epoch": 1 }, { "type": "loss", "content": 0.008130094036459923, "timestamp": "2025-09-10 02:18:33.529833", "step": 1257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:33.561516", "step": 1257, "epoch": 1 }, { "type": "loss", "content": 0.024972526356577873, "timestamp": "2025-09-10 02:18:33.565954", "step": 1258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:33.596831", "step": 1258, "epoch": 1 }, { "type": "loss", "content": 0.027556994929909706, "timestamp": "2025-09-10 02:18:33.601277", "step": 1259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:33.632346", "step": 1259, "epoch": 1 }, { "type": "loss", "content": 0.018490461632609367, "timestamp": "2025-09-10 02:18:33.661045", "step": 1260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:33.695309", "step": 1260, "epoch": 1 }, { "type": "loss", "content": 0.009816362522542477, "timestamp": "2025-09-10 02:18:33.708052", "step": 1261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:33.750489", "step": 1261, "epoch": 1 }, { "type": "loss", "content": 0.016465116292238235, "timestamp": "2025-09-10 02:18:33.766332", "step": 1262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:33.799401", "step": 1262, "epoch": 1 }, { "type": "loss", "content": 0.001814844785258174, "timestamp": "2025-09-10 02:18:33.810308", "step": 1263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:33.845201", "step": 1263, "epoch": 1 }, { "type": "loss", "content": 0.013563080690801144, "timestamp": "2025-09-10 02:18:33.873063", "step": 1264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:33.908938", "step": 1264, "epoch": 1 }, { "type": "loss", "content": 0.016335798427462578, "timestamp": "2025-09-10 02:18:33.913348", "step": 1265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:33.946699", "step": 1265, "epoch": 1 }, { "type": "loss", "content": 0.01673940010368824, "timestamp": "2025-09-10 02:18:33.957355", "step": 1266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:33.991526", "step": 1266, "epoch": 1 }, { "type": "loss", "content": 0.033870987594127655, "timestamp": "2025-09-10 02:18:34.004095", "step": 1267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:34.038372", "step": 1267, "epoch": 1 }, { "type": "loss", "content": 0.005386251490563154, "timestamp": "2025-09-10 02:18:34.069241", "step": 1268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:34.107804", "step": 1268, "epoch": 1 }, { "type": "loss", "content": 0.030843589454889297, "timestamp": "2025-09-10 02:18:34.117801", "step": 1269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:34.151381", "step": 1269, "epoch": 1 }, { "type": "loss", "content": 0.00869796983897686, "timestamp": "2025-09-10 02:18:34.158066", "step": 1270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:34.189862", "step": 1270, "epoch": 1 }, { "type": "loss", "content": 0.046526242047548294, "timestamp": "2025-09-10 02:18:34.200281", "step": 1271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:18:34.245614", "step": 1271, "epoch": 1 }, { "type": "loss", "content": 0.011616252362728119, "timestamp": "2025-09-10 02:18:34.284123", "step": 1272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:34.323074", "step": 1272, "epoch": 1 }, { "type": "loss", "content": 0.007240879815071821, "timestamp": "2025-09-10 02:18:34.338246", "step": 1273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:34.372144", "step": 1273, "epoch": 1 }, { "type": "loss", "content": 0.010705935768783092, "timestamp": "2025-09-10 02:18:34.376353", "step": 1274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:34.407558", "step": 1274, "epoch": 1 }, { "type": "loss", "content": 0.004706122912466526, "timestamp": "2025-09-10 02:18:34.414541", "step": 1275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:34.448868", "step": 1275, "epoch": 1 }, { "type": "loss", "content": 0.01904475688934326, "timestamp": "2025-09-10 02:18:34.473491", "step": 1276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:34.509351", "step": 1276, "epoch": 1 }, { "type": "loss", "content": 0.004496569279581308, "timestamp": "2025-09-10 02:18:34.513801", "step": 1277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:34.554885", "step": 1277, "epoch": 1 }, { "type": "loss", "content": 0.03280925750732422, "timestamp": "2025-09-10 02:18:34.565678", "step": 1278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:34.602218", "step": 1278, "epoch": 1 }, { "type": "loss", "content": 0.005936585366725922, "timestamp": "2025-09-10 02:18:34.609147", "step": 1279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:34.640172", "step": 1279, "epoch": 1 }, { "type": "loss", "content": 0.007575146853923798, "timestamp": "2025-09-10 02:18:34.665352", "step": 1280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:18:34.696244", "step": 1280, "epoch": 1 }, { "type": "loss", "content": 0.012064780108630657, "timestamp": "2025-09-10 02:18:34.699504", "step": 1281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:34.732312", "step": 1281, "epoch": 1 }, { "type": "loss", "content": 0.012344618327915668, "timestamp": "2025-09-10 02:18:34.742146", "step": 1282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:34.778396", "step": 1282, "epoch": 1 }, { "type": "loss", "content": 0.025485141202807426, "timestamp": "2025-09-10 02:18:34.792184", "step": 1283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:34.827800", "step": 1283, "epoch": 1 }, { "type": "loss", "content": 0.005437423940747976, "timestamp": "2025-09-10 02:18:34.855789", "step": 1284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:34.887426", "step": 1284, "epoch": 1 }, { "type": "loss", "content": 0.023022016510367393, "timestamp": "2025-09-10 02:18:34.891764", "step": 1285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:34.929544", "step": 1285, "epoch": 1 }, { "type": "loss", "content": 0.003953091334551573, "timestamp": "2025-09-10 02:18:34.941312", "step": 1286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:34.971935", "step": 1286, "epoch": 1 }, { "type": "loss", "content": 0.005520283244550228, "timestamp": "2025-09-10 02:18:34.983936", "step": 1287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:35.017990", "step": 1287, "epoch": 1 }, { "type": "loss", "content": 0.014434975571930408, "timestamp": "2025-09-10 02:18:35.051393", "step": 1288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:35.084410", "step": 1288, "epoch": 1 }, { "type": "loss", "content": 0.011467205360531807, "timestamp": "2025-09-10 02:18:35.096911", "step": 1289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:35.127795", "step": 1289, "epoch": 1 }, { "type": "loss", "content": 0.020779237151145935, "timestamp": "2025-09-10 02:18:35.134525", "step": 1290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:35.168761", "step": 1290, "epoch": 1 }, { "type": "loss", "content": 0.0018716433551162481, "timestamp": "2025-09-10 02:18:35.175865", "step": 1291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:35.206686", "step": 1291, "epoch": 1 }, { "type": "loss", "content": 0.0036745467223227024, "timestamp": "2025-09-10 02:18:35.231575", "step": 1292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:35.262541", "step": 1292, "epoch": 1 }, { "type": "loss", "content": 0.016196925193071365, "timestamp": "2025-09-10 02:18:35.270330", "step": 1293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:35.301030", "step": 1293, "epoch": 1 }, { "type": "loss", "content": 0.0035647223703563213, "timestamp": "2025-09-10 02:18:35.311839", "step": 1294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:35.343584", "step": 1294, "epoch": 1 }, { "type": "loss", "content": 0.010751097463071346, "timestamp": "2025-09-10 02:18:35.346429", "step": 1295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:35.377073", "step": 1295, "epoch": 1 }, { "type": "loss", "content": 0.0385432243347168, "timestamp": "2025-09-10 02:18:35.405578", "step": 1296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:35.438840", "step": 1296, "epoch": 1 }, { "type": "loss", "content": 0.026758210733532906, "timestamp": "2025-09-10 02:18:35.451961", "step": 1297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:35.484987", "step": 1297, "epoch": 1 }, { "type": "loss", "content": 0.01798836700618267, "timestamp": "2025-09-10 02:18:35.495753", "step": 1298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:35.529550", "step": 1298, "epoch": 1 }, { "type": "loss", "content": 0.01102465484291315, "timestamp": "2025-09-10 02:18:35.533877", "step": 1299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:35.564153", "step": 1299, "epoch": 1 }, { "type": "loss", "content": 0.011853739619255066, "timestamp": "2025-09-10 02:18:35.589056", "step": 1300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:35.620634", "step": 1300, "epoch": 1 }, { "type": "loss", "content": 0.03611797094345093, "timestamp": "2025-09-10 02:18:35.630245", "step": 1301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:18:35.670917", "step": 1301, "epoch": 1 }, { "type": "loss", "content": 0.0027977568097412586, "timestamp": "2025-09-10 02:18:35.688020", "step": 1302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:35.719854", "step": 1302, "epoch": 1 }, { "type": "loss", "content": 0.009604268707334995, "timestamp": "2025-09-10 02:18:35.726938", "step": 1303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:35.758003", "step": 1303, "epoch": 1 }, { "type": "loss", "content": 0.002295356709510088, "timestamp": "2025-09-10 02:18:35.785829", "step": 1304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:35.822533", "step": 1304, "epoch": 1 }, { "type": "loss", "content": 0.008270000107586384, "timestamp": "2025-09-10 02:18:35.837973", "step": 1305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:18:35.872182", "step": 1305, "epoch": 1 }, { "type": "loss", "content": 0.01986978016793728, "timestamp": "2025-09-10 02:18:35.875338", "step": 1306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:35.911087", "step": 1306, "epoch": 1 }, { "type": "loss", "content": 0.023896988481283188, "timestamp": "2025-09-10 02:18:35.921057", "step": 1307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:35.957355", "step": 1307, "epoch": 1 }, { "type": "loss", "content": 0.020381931215524673, "timestamp": "2025-09-10 02:18:35.991669", "step": 1308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:36.026271", "step": 1308, "epoch": 1 }, { "type": "loss", "content": 0.014767967164516449, "timestamp": "2025-09-10 02:18:36.031507", "step": 1309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:36.065762", "step": 1309, "epoch": 1 }, { "type": "loss", "content": 0.002422439632937312, "timestamp": "2025-09-10 02:18:36.070149", "step": 1310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:36.100810", "step": 1310, "epoch": 1 }, { "type": "loss", "content": 0.024649931117892265, "timestamp": "2025-09-10 02:18:36.108445", "step": 1311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:36.141430", "step": 1311, "epoch": 1 }, { "type": "loss", "content": 0.009912949986755848, "timestamp": "2025-09-10 02:18:36.172465", "step": 1312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:36.203763", "step": 1312, "epoch": 1 }, { "type": "loss", "content": 0.002744142198935151, "timestamp": "2025-09-10 02:18:36.212204", "step": 1313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:36.243525", "step": 1313, "epoch": 1 }, { "type": "loss", "content": 0.007335959933698177, "timestamp": "2025-09-10 02:18:36.255726", "step": 1314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:36.287489", "step": 1314, "epoch": 1 }, { "type": "loss", "content": 0.041926268488168716, "timestamp": "2025-09-10 02:18:36.294324", "step": 1315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:36.326188", "step": 1315, "epoch": 1 }, { "type": "loss", "content": 0.0036907510366290808, "timestamp": "2025-09-10 02:18:36.357722", "step": 1316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:36.391305", "step": 1316, "epoch": 1 }, { "type": "loss", "content": 0.00216344790533185, "timestamp": "2025-09-10 02:18:36.404473", "step": 1317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:18:36.445720", "step": 1317, "epoch": 1 }, { "type": "loss", "content": 0.009386607445776463, "timestamp": "2025-09-10 02:18:36.461893", "step": 1318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:36.494105", "step": 1318, "epoch": 1 }, { "type": "loss", "content": 0.030109494924545288, "timestamp": "2025-09-10 02:18:36.501386", "step": 1319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:36.533395", "step": 1319, "epoch": 1 }, { "type": "loss", "content": 0.0012107326183468103, "timestamp": "2025-09-10 02:18:36.566829", "step": 1320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:36.598400", "step": 1320, "epoch": 1 }, { "type": "loss", "content": 0.008304606191813946, "timestamp": "2025-09-10 02:18:36.602975", "step": 1321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:36.634882", "step": 1321, "epoch": 1 }, { "type": "loss", "content": 0.022128764539957047, "timestamp": "2025-09-10 02:18:36.642598", "step": 1322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:36.673862", "step": 1322, "epoch": 1 }, { "type": "loss", "content": 0.009982970543205738, "timestamp": "2025-09-10 02:18:36.686427", "step": 1323, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:18:46.851890", "step": 1323, "epoch": 1 }, { "type": "pplx", "content": 13954997.402758988, "timestamp": "2025-09-10 02:18:46.854605", "step": 1323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:46.886451", "step": 1323, "epoch": 1 }, { "type": "loss", "content": 0.018629444763064384, "timestamp": "2025-09-10 02:18:46.920587", "step": 1324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:46.952098", "step": 1324, "epoch": 1 }, { "type": "loss", "content": 0.015395854599773884, "timestamp": "2025-09-10 02:18:46.960873", "step": 1325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:46.992652", "step": 1325, "epoch": 1 }, { "type": "loss", "content": 0.0029722540639340878, "timestamp": "2025-09-10 02:18:47.002965", "step": 1326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:47.036788", "step": 1326, "epoch": 1 }, { "type": "loss", "content": 0.0171508826315403, "timestamp": "2025-09-10 02:18:47.050107", "step": 1327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:47.081821", "step": 1327, "epoch": 1 }, { "type": "loss", "content": 0.004396271891891956, "timestamp": "2025-09-10 02:18:47.110082", "step": 1328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:47.141128", "step": 1328, "epoch": 1 }, { "type": "loss", "content": 0.03813646361231804, "timestamp": "2025-09-10 02:18:47.145678", "step": 1329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:47.177598", "step": 1329, "epoch": 1 }, { "type": "loss", "content": 0.005988952703773975, "timestamp": "2025-09-10 02:18:47.189873", "step": 1330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:47.224977", "step": 1330, "epoch": 1 }, { "type": "loss", "content": 0.03351500257849693, "timestamp": "2025-09-10 02:18:47.238370", "step": 1331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:47.271603", "step": 1331, "epoch": 1 }, { "type": "loss", "content": 0.003736252663657069, "timestamp": "2025-09-10 02:18:47.296535", "step": 1332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:47.328532", "step": 1332, "epoch": 1 }, { "type": "loss", "content": 0.03064594976603985, "timestamp": "2025-09-10 02:18:47.337440", "step": 1333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:47.369734", "step": 1333, "epoch": 1 }, { "type": "loss", "content": 0.02222239412367344, "timestamp": "2025-09-10 02:18:47.380141", "step": 1334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:18:47.416111", "step": 1334, "epoch": 1 }, { "type": "loss", "content": 0.026622384786605835, "timestamp": "2025-09-10 02:18:47.429721", "step": 1335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:47.464492", "step": 1335, "epoch": 1 }, { "type": "loss", "content": 0.008432361297309399, "timestamp": "2025-09-10 02:18:47.498715", "step": 1336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:47.531910", "step": 1336, "epoch": 1 }, { "type": "loss", "content": 0.004825720097869635, "timestamp": "2025-09-10 02:18:47.534233", "step": 1337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:47.565732", "step": 1337, "epoch": 1 }, { "type": "loss", "content": 0.004051771480590105, "timestamp": "2025-09-10 02:18:47.572231", "step": 1338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:47.603586", "step": 1338, "epoch": 1 }, { "type": "loss", "content": 0.020291676744818687, "timestamp": "2025-09-10 02:18:47.610210", "step": 1339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:47.642064", "step": 1339, "epoch": 1 }, { "type": "loss", "content": 0.017118671908974648, "timestamp": "2025-09-10 02:18:47.674586", "step": 1340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:47.707026", "step": 1340, "epoch": 1 }, { "type": "loss", "content": 0.0032200440764427185, "timestamp": "2025-09-10 02:18:47.714102", "step": 1341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:47.746221", "step": 1341, "epoch": 1 }, { "type": "loss", "content": 0.0031123815570026636, "timestamp": "2025-09-10 02:18:47.755884", "step": 1342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:47.787649", "step": 1342, "epoch": 1 }, { "type": "loss", "content": 0.007114849053323269, "timestamp": "2025-09-10 02:18:47.794652", "step": 1343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:47.826963", "step": 1343, "epoch": 1 }, { "type": "loss", "content": 0.00877163652330637, "timestamp": "2025-09-10 02:18:47.859375", "step": 1344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:47.890078", "step": 1344, "epoch": 1 }, { "type": "loss", "content": 0.03030526638031006, "timestamp": "2025-09-10 02:18:47.892273", "step": 1345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:47.925807", "step": 1345, "epoch": 1 }, { "type": "loss", "content": 0.005265057552605867, "timestamp": "2025-09-10 02:18:47.936141", "step": 1346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:47.971811", "step": 1346, "epoch": 1 }, { "type": "loss", "content": 0.001451778458431363, "timestamp": "2025-09-10 02:18:47.978351", "step": 1347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:48.009497", "step": 1347, "epoch": 1 }, { "type": "loss", "content": 0.011003616265952587, "timestamp": "2025-09-10 02:18:48.037639", "step": 1348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:48.070245", "step": 1348, "epoch": 1 }, { "type": "loss", "content": 0.00957377441227436, "timestamp": "2025-09-10 02:18:48.077376", "step": 1349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:48.108133", "step": 1349, "epoch": 1 }, { "type": "loss", "content": 0.004885104484856129, "timestamp": "2025-09-10 02:18:48.114983", "step": 1350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:48.148190", "step": 1350, "epoch": 1 }, { "type": "loss", "content": 0.022131670266389847, "timestamp": "2025-09-10 02:18:48.160564", "step": 1351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:48.197590", "step": 1351, "epoch": 1 }, { "type": "loss", "content": 0.004956061951816082, "timestamp": "2025-09-10 02:18:48.232314", "step": 1352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:48.265558", "step": 1352, "epoch": 1 }, { "type": "loss", "content": 0.018229112029075623, "timestamp": "2025-09-10 02:18:48.275361", "step": 1353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:48.306870", "step": 1353, "epoch": 1 }, { "type": "loss", "content": 0.010464141145348549, "timestamp": "2025-09-10 02:18:48.316533", "step": 1354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:48.347729", "step": 1354, "epoch": 1 }, { "type": "loss", "content": 0.0036923617590218782, "timestamp": "2025-09-10 02:18:48.349819", "step": 1355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:48.381637", "step": 1355, "epoch": 1 }, { "type": "loss", "content": 0.05059584602713585, "timestamp": "2025-09-10 02:18:48.409372", "step": 1356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:48.441772", "step": 1356, "epoch": 1 }, { "type": "loss", "content": 0.005611395929008722, "timestamp": "2025-09-10 02:18:48.454840", "step": 1357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:48.486090", "step": 1357, "epoch": 1 }, { "type": "loss", "content": 0.00173103844281286, "timestamp": "2025-09-10 02:18:48.489918", "step": 1358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:48.521672", "step": 1358, "epoch": 1 }, { "type": "loss", "content": 0.00872302707284689, "timestamp": "2025-09-10 02:18:48.534199", "step": 1359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:48.565533", "step": 1359, "epoch": 1 }, { "type": "loss", "content": 0.003065047785639763, "timestamp": "2025-09-10 02:18:48.593194", "step": 1360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:48.624533", "step": 1360, "epoch": 1 }, { "type": "loss", "content": 0.007230323273688555, "timestamp": "2025-09-10 02:18:48.631787", "step": 1361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:48.663902", "step": 1361, "epoch": 1 }, { "type": "loss", "content": 0.012289733625948429, "timestamp": "2025-09-10 02:18:48.676196", "step": 1362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:18:48.711977", "step": 1362, "epoch": 1 }, { "type": "loss", "content": 0.05497897043824196, "timestamp": "2025-09-10 02:18:48.725681", "step": 1363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:48.757658", "step": 1363, "epoch": 1 }, { "type": "loss", "content": 0.002912584925070405, "timestamp": "2025-09-10 02:18:48.788064", "step": 1364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:18:48.821183", "step": 1364, "epoch": 1 }, { "type": "loss", "content": 0.0022109579294919968, "timestamp": "2025-09-10 02:18:48.834324", "step": 1365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:48.866771", "step": 1365, "epoch": 1 }, { "type": "loss", "content": 0.018029719591140747, "timestamp": "2025-09-10 02:18:48.874073", "step": 1366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:48.906286", "step": 1366, "epoch": 1 }, { "type": "loss", "content": 0.056414928287267685, "timestamp": "2025-09-10 02:18:48.912869", "step": 1367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:48.945028", "step": 1367, "epoch": 1 }, { "type": "loss", "content": 0.05040454491972923, "timestamp": "2025-09-10 02:18:48.975529", "step": 1368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:49.009208", "step": 1368, "epoch": 1 }, { "type": "loss", "content": 0.01217829529196024, "timestamp": "2025-09-10 02:18:49.013682", "step": 1369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:49.048154", "step": 1369, "epoch": 1 }, { "type": "loss", "content": 0.02744656801223755, "timestamp": "2025-09-10 02:18:49.061502", "step": 1370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:49.096528", "step": 1370, "epoch": 1 }, { "type": "loss", "content": 0.02190292812883854, "timestamp": "2025-09-10 02:18:49.106149", "step": 1371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:18:49.140047", "step": 1371, "epoch": 1 }, { "type": "loss", "content": 0.03642702102661133, "timestamp": "2025-09-10 02:18:49.174250", "step": 1372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:49.208227", "step": 1372, "epoch": 1 }, { "type": "loss", "content": 0.012048700824379921, "timestamp": "2025-09-10 02:18:49.213934", "step": 1373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:49.255170", "step": 1373, "epoch": 1 }, { "type": "loss", "content": 0.02002662420272827, "timestamp": "2025-09-10 02:18:49.262645", "step": 1374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:49.295812", "step": 1374, "epoch": 1 }, { "type": "loss", "content": 0.012123959138989449, "timestamp": "2025-09-10 02:18:49.303233", "step": 1375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:49.334905", "step": 1375, "epoch": 1 }, { "type": "loss", "content": 0.007124970201402903, "timestamp": "2025-09-10 02:18:49.366230", "step": 1376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:49.399376", "step": 1376, "epoch": 1 }, { "type": "loss", "content": 0.003386021126061678, "timestamp": "2025-09-10 02:18:49.401904", "step": 1377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:49.434940", "step": 1377, "epoch": 1 }, { "type": "loss", "content": 0.0014372080331668258, "timestamp": "2025-09-10 02:18:49.444371", "step": 1378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:49.476646", "step": 1378, "epoch": 1 }, { "type": "loss", "content": 0.03508186340332031, "timestamp": "2025-09-10 02:18:49.482744", "step": 1379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:18:49.535283", "step": 1379, "epoch": 1 }, { "type": "loss", "content": 0.011475126259028912, "timestamp": "2025-09-10 02:18:49.577910", "step": 1380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:49.614902", "step": 1380, "epoch": 1 }, { "type": "loss", "content": 0.0056556230410933495, "timestamp": "2025-09-10 02:18:49.620965", "step": 1381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:49.657781", "step": 1381, "epoch": 1 }, { "type": "loss", "content": 0.008287766017019749, "timestamp": "2025-09-10 02:18:49.661791", "step": 1382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:18:49.704027", "step": 1382, "epoch": 1 }, { "type": "loss", "content": 0.02117903158068657, "timestamp": "2025-09-10 02:18:49.721395", "step": 1383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:49.754731", "step": 1383, "epoch": 1 }, { "type": "loss", "content": 0.022697385400533676, "timestamp": "2025-09-10 02:18:49.787804", "step": 1384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:49.824331", "step": 1384, "epoch": 1 }, { "type": "loss", "content": 0.010887114331126213, "timestamp": "2025-09-10 02:18:49.828629", "step": 1385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:49.861534", "step": 1385, "epoch": 1 }, { "type": "loss", "content": 0.012805354781448841, "timestamp": "2025-09-10 02:18:49.865426", "step": 1386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:49.898188", "step": 1386, "epoch": 1 }, { "type": "loss", "content": 0.01245130505412817, "timestamp": "2025-09-10 02:18:49.908448", "step": 1387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:49.939966", "step": 1387, "epoch": 1 }, { "type": "loss", "content": 0.028233621269464493, "timestamp": "2025-09-10 02:18:49.964718", "step": 1388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:49.997212", "step": 1388, "epoch": 1 }, { "type": "loss", "content": 0.02826070412993431, "timestamp": "2025-09-10 02:18:50.001547", "step": 1389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:50.033335", "step": 1389, "epoch": 1 }, { "type": "loss", "content": 0.009805792011320591, "timestamp": "2025-09-10 02:18:50.040028", "step": 1390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:50.071252", "step": 1390, "epoch": 1 }, { "type": "loss", "content": 0.005362308118492365, "timestamp": "2025-09-10 02:18:50.078294", "step": 1391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:50.111180", "step": 1391, "epoch": 1 }, { "type": "loss", "content": 0.013351285830140114, "timestamp": "2025-09-10 02:18:50.135326", "step": 1392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:50.167928", "step": 1392, "epoch": 1 }, { "type": "loss", "content": 0.02243475615978241, "timestamp": "2025-09-10 02:18:50.172362", "step": 1393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:50.210986", "step": 1393, "epoch": 1 }, { "type": "loss", "content": 0.014227988198399544, "timestamp": "2025-09-10 02:18:50.223121", "step": 1394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:50.266730", "step": 1394, "epoch": 1 }, { "type": "loss", "content": 0.004071381408721209, "timestamp": "2025-09-10 02:18:50.282661", "step": 1395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:50.314994", "step": 1395, "epoch": 1 }, { "type": "loss", "content": 0.008621515706181526, "timestamp": "2025-09-10 02:18:50.345670", "step": 1396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:50.383033", "step": 1396, "epoch": 1 }, { "type": "loss", "content": 0.009817084297537804, "timestamp": "2025-09-10 02:18:50.385956", "step": 1397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:50.419718", "step": 1397, "epoch": 1 }, { "type": "loss", "content": 0.012866640463471413, "timestamp": "2025-09-10 02:18:50.426916", "step": 1398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:50.462427", "step": 1398, "epoch": 1 }, { "type": "loss", "content": 0.004508704878389835, "timestamp": "2025-09-10 02:18:50.466612", "step": 1399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:18:50.499638", "step": 1399, "epoch": 1 }, { "type": "loss", "content": 0.01872037909924984, "timestamp": "2025-09-10 02:18:50.523484", "step": 1400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:50.556465", "step": 1400, "epoch": 1 }, { "type": "loss", "content": 0.006643320899456739, "timestamp": "2025-09-10 02:18:50.560693", "step": 1401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:50.591810", "step": 1401, "epoch": 1 }, { "type": "loss", "content": 0.007569948676973581, "timestamp": "2025-09-10 02:18:50.599186", "step": 1402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:50.630800", "step": 1402, "epoch": 1 }, { "type": "loss", "content": 0.025758620351552963, "timestamp": "2025-09-10 02:18:50.640631", "step": 1403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:50.671891", "step": 1403, "epoch": 1 }, { "type": "loss", "content": 0.0335959829390049, "timestamp": "2025-09-10 02:18:50.696605", "step": 1404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:18:50.745995", "step": 1404, "epoch": 1 }, { "type": "loss", "content": 0.030695544555783272, "timestamp": "2025-09-10 02:18:50.767499", "step": 1405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:50.799735", "step": 1405, "epoch": 1 }, { "type": "loss", "content": 0.0049163768999278545, "timestamp": "2025-09-10 02:18:50.804055", "step": 1406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:50.836221", "step": 1406, "epoch": 1 }, { "type": "loss", "content": 0.010724040679633617, "timestamp": "2025-09-10 02:18:50.839963", "step": 1407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:50.871715", "step": 1407, "epoch": 1 }, { "type": "loss", "content": 0.008394693955779076, "timestamp": "2025-09-10 02:18:50.896887", "step": 1408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:50.928718", "step": 1408, "epoch": 1 }, { "type": "loss", "content": 0.026338692754507065, "timestamp": "2025-09-10 02:18:50.933342", "step": 1409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:50.967081", "step": 1409, "epoch": 1 }, { "type": "loss", "content": 0.016750004142522812, "timestamp": "2025-09-10 02:18:50.971288", "step": 1410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:51.003371", "step": 1410, "epoch": 1 }, { "type": "loss", "content": 0.012398646213114262, "timestamp": "2025-09-10 02:18:51.010725", "step": 1411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:51.042668", "step": 1411, "epoch": 1 }, { "type": "loss", "content": 0.0241679884493351, "timestamp": "2025-09-10 02:18:51.067747", "step": 1412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:51.100071", "step": 1412, "epoch": 1 }, { "type": "loss", "content": 0.02775205485522747, "timestamp": "2025-09-10 02:18:51.104368", "step": 1413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.136377", "step": 1413, "epoch": 1 }, { "type": "loss", "content": 0.008829674683511257, "timestamp": "2025-09-10 02:18:51.143509", "step": 1414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:51.175624", "step": 1414, "epoch": 1 }, { "type": "loss", "content": 0.015450743958353996, "timestamp": "2025-09-10 02:18:51.182906", "step": 1415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:51.215436", "step": 1415, "epoch": 1 }, { "type": "loss", "content": 0.006386533845216036, "timestamp": "2025-09-10 02:18:51.248305", "step": 1416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.280172", "step": 1416, "epoch": 1 }, { "type": "loss", "content": 0.010058706626296043, "timestamp": "2025-09-10 02:18:51.284942", "step": 1417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:51.316164", "step": 1417, "epoch": 1 }, { "type": "loss", "content": 0.03240646421909332, "timestamp": "2025-09-10 02:18:51.322735", "step": 1418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:51.354522", "step": 1418, "epoch": 1 }, { "type": "loss", "content": 0.009293629787862301, "timestamp": "2025-09-10 02:18:51.364958", "step": 1419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.397068", "step": 1419, "epoch": 1 }, { "type": "loss", "content": 0.009477603249251842, "timestamp": "2025-09-10 02:18:51.425134", "step": 1420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:51.457027", "step": 1420, "epoch": 1 }, { "type": "loss", "content": 0.0042470647022128105, "timestamp": "2025-09-10 02:18:51.462142", "step": 1421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.493350", "step": 1421, "epoch": 1 }, { "type": "loss", "content": 0.028168709948658943, "timestamp": "2025-09-10 02:18:51.500444", "step": 1422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:51.531912", "step": 1422, "epoch": 1 }, { "type": "loss", "content": 0.054098401218652725, "timestamp": "2025-09-10 02:18:51.538707", "step": 1423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:51.569998", "step": 1423, "epoch": 1 }, { "type": "loss", "content": 0.013340512290596962, "timestamp": "2025-09-10 02:18:51.594750", "step": 1424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.626137", "step": 1424, "epoch": 1 }, { "type": "loss", "content": 0.001269067986868322, "timestamp": "2025-09-10 02:18:51.631049", "step": 1425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:51.661884", "step": 1425, "epoch": 1 }, { "type": "loss", "content": 0.01629549451172352, "timestamp": "2025-09-10 02:18:51.669352", "step": 1426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:18:51.700505", "step": 1426, "epoch": 1 }, { "type": "loss", "content": 0.013591033406555653, "timestamp": "2025-09-10 02:18:51.712492", "step": 1427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:51.743458", "step": 1427, "epoch": 1 }, { "type": "loss", "content": 0.01069872546941042, "timestamp": "2025-09-10 02:18:51.772110", "step": 1428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:51.802797", "step": 1428, "epoch": 1 }, { "type": "loss", "content": 0.026248564943671227, "timestamp": "2025-09-10 02:18:51.807208", "step": 1429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:51.838296", "step": 1429, "epoch": 1 }, { "type": "loss", "content": 0.013996967114508152, "timestamp": "2025-09-10 02:18:51.845290", "step": 1430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:18:51.901325", "step": 1430, "epoch": 1 }, { "type": "loss", "content": 0.01444973610341549, "timestamp": "2025-09-10 02:18:51.924697", "step": 1431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:51.957150", "step": 1431, "epoch": 1 }, { "type": "loss", "content": 0.01952037401497364, "timestamp": "2025-09-10 02:18:51.984564", "step": 1432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:52.015526", "step": 1432, "epoch": 1 }, { "type": "loss", "content": 0.02621072344481945, "timestamp": "2025-09-10 02:18:52.019985", "step": 1433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:52.050722", "step": 1433, "epoch": 1 }, { "type": "loss", "content": 0.019877398386597633, "timestamp": "2025-09-10 02:18:52.055320", "step": 1434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:52.086287", "step": 1434, "epoch": 1 }, { "type": "loss", "content": 0.008985900320112705, "timestamp": "2025-09-10 02:18:52.096891", "step": 1435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:52.128162", "step": 1435, "epoch": 1 }, { "type": "loss", "content": 0.006387191358953714, "timestamp": "2025-09-10 02:18:52.158997", "step": 1436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:52.189148", "step": 1436, "epoch": 1 }, { "type": "loss", "content": 0.008200598880648613, "timestamp": "2025-09-10 02:18:52.192387", "step": 1437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:18:52.224886", "step": 1437, "epoch": 1 }, { "type": "loss", "content": 0.014574953354895115, "timestamp": "2025-09-10 02:18:52.228614", "step": 1438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:52.265452", "step": 1438, "epoch": 1 }, { "type": "loss", "content": 0.010290967300534248, "timestamp": "2025-09-10 02:18:52.272532", "step": 1439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:52.305412", "step": 1439, "epoch": 1 }, { "type": "loss", "content": 0.009290986694395542, "timestamp": "2025-09-10 02:18:52.336620", "step": 1440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:52.372568", "step": 1440, "epoch": 1 }, { "type": "loss", "content": 0.007599604316055775, "timestamp": "2025-09-10 02:18:52.380911", "step": 1441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:52.415184", "step": 1441, "epoch": 1 }, { "type": "loss", "content": 0.008472729474306107, "timestamp": "2025-09-10 02:18:52.421944", "step": 1442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:52.452994", "step": 1442, "epoch": 1 }, { "type": "loss", "content": 0.01178077794611454, "timestamp": "2025-09-10 02:18:52.460256", "step": 1443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:52.492503", "step": 1443, "epoch": 1 }, { "type": "loss", "content": 0.02008945122361183, "timestamp": "2025-09-10 02:18:52.520123", "step": 1444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:52.553628", "step": 1444, "epoch": 1 }, { "type": "loss", "content": 0.004093306139111519, "timestamp": "2025-09-10 02:18:52.556553", "step": 1445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:52.589207", "step": 1445, "epoch": 1 }, { "type": "loss", "content": 0.007239846047013998, "timestamp": "2025-09-10 02:18:52.595784", "step": 1446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:52.627497", "step": 1446, "epoch": 1 }, { "type": "loss", "content": 0.010106794536113739, "timestamp": "2025-09-10 02:18:52.636740", "step": 1447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:18:52.675187", "step": 1447, "epoch": 1 }, { "type": "loss", "content": 0.013031134381890297, "timestamp": "2025-09-10 02:18:52.711997", "step": 1448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:52.753937", "step": 1448, "epoch": 1 }, { "type": "loss", "content": 0.008832174353301525, "timestamp": "2025-09-10 02:18:52.761978", "step": 1449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:52.794971", "step": 1449, "epoch": 1 }, { "type": "loss", "content": 0.021678507328033447, "timestamp": "2025-09-10 02:18:52.804407", "step": 1450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:18:52.834882", "step": 1450, "epoch": 1 }, { "type": "loss", "content": 0.006038912571966648, "timestamp": "2025-09-10 02:18:52.841917", "step": 1451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:18:52.872336", "step": 1451, "epoch": 1 }, { "type": "loss", "content": 0.015430964529514313, "timestamp": "2025-09-10 02:18:52.903214", "step": 1452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:52.934017", "step": 1452, "epoch": 1 }, { "type": "loss", "content": 0.015627246350049973, "timestamp": "2025-09-10 02:18:52.939110", "step": 1453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:52.969456", "step": 1453, "epoch": 1 }, { "type": "loss", "content": 0.01219885889440775, "timestamp": "2025-09-10 02:18:52.982005", "step": 1454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:53.013431", "step": 1454, "epoch": 1 }, { "type": "loss", "content": 0.024581179022789, "timestamp": "2025-09-10 02:18:53.024459", "step": 1455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:53.055390", "step": 1455, "epoch": 1 }, { "type": "loss", "content": 0.008804569952189922, "timestamp": "2025-09-10 02:18:53.083745", "step": 1456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:53.115291", "step": 1456, "epoch": 1 }, { "type": "loss", "content": 0.013319587334990501, "timestamp": "2025-09-10 02:18:53.123250", "step": 1457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:53.154585", "step": 1457, "epoch": 1 }, { "type": "loss", "content": 0.013631954789161682, "timestamp": "2025-09-10 02:18:53.161717", "step": 1458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:18:53.194524", "step": 1458, "epoch": 1 }, { "type": "loss", "content": 0.012589896097779274, "timestamp": "2025-09-10 02:18:53.200982", "step": 1459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:18:53.232868", "step": 1459, "epoch": 1 }, { "type": "loss", "content": 0.02879754640161991, "timestamp": "2025-09-10 02:18:53.257640", "step": 1460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:53.288577", "step": 1460, "epoch": 1 }, { "type": "loss", "content": 0.008828964084386826, "timestamp": "2025-09-10 02:18:53.293605", "step": 1461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:18:53.323952", "step": 1461, "epoch": 1 }, { "type": "loss", "content": 0.02157404087483883, "timestamp": "2025-09-10 02:18:53.331730", "step": 1462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:18:53.362913", "step": 1462, "epoch": 1 }, { "type": "loss", "content": 0.02861526980996132, "timestamp": "2025-09-10 02:18:53.373749", "step": 1463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:53.404535", "step": 1463, "epoch": 1 }, { "type": "loss", "content": 0.0109772440046072, "timestamp": "2025-09-10 02:18:53.430088", "step": 1464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:18:53.460834", "step": 1464, "epoch": 1 }, { "type": "loss", "content": 0.02909570373594761, "timestamp": "2025-09-10 02:18:53.463058", "step": 1465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:18:53.504235", "step": 1465, "epoch": 1 }, { "type": "loss", "content": 0.02886452153325081, "timestamp": "2025-09-10 02:18:53.521575", "step": 1466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:18:53.560419", "step": 1466, "epoch": 1 }, { "type": "loss", "content": 0.013743521645665169, "timestamp": "2025-09-10 02:18:53.576057", "step": 1467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:18:53.607609", "step": 1467, "epoch": 1 }, { "type": "loss", "content": 0.0125979483127594, "timestamp": "2025-09-10 02:18:53.635898", "step": 1468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:18:53.667674", "step": 1468, "epoch": 1 }, { "type": "loss", "content": 0.02716805413365364, "timestamp": "2025-09-10 02:18:53.677321", "step": 1469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:18:53.718446", "step": 1469, "epoch": 1 }, { "type": "loss", "content": 0.025139151141047478, "timestamp": "2025-09-10 02:18:53.735533", "step": 1470, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:19:03.879290", "step": 1470, "epoch": 1 }, { "type": "pplx", "content": 13626061.914788976, "timestamp": "2025-09-10 02:19:03.882109", "step": 1470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:03.912866", "step": 1470, "epoch": 1 }, { "type": "loss", "content": 0.017740854993462563, "timestamp": "2025-09-10 02:19:03.918795", "step": 1471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:19:03.958443", "step": 1471, "epoch": 1 }, { "type": "loss", "content": 0.021945033222436905, "timestamp": "2025-09-10 02:19:03.995667", "step": 1472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:04.026813", "step": 1472, "epoch": 1 }, { "type": "loss", "content": 0.003880431642755866, "timestamp": "2025-09-10 02:19:04.031328", "step": 1473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:04.061204", "step": 1473, "epoch": 1 }, { "type": "loss", "content": 0.025182703509926796, "timestamp": "2025-09-10 02:19:04.071981", "step": 1474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:04.104926", "step": 1474, "epoch": 1 }, { "type": "loss", "content": 0.009050360880792141, "timestamp": "2025-09-10 02:19:04.117506", "step": 1475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:04.148809", "step": 1475, "epoch": 1 }, { "type": "loss", "content": 0.020268557593226433, "timestamp": "2025-09-10 02:19:04.181676", "step": 1476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:04.211493", "step": 1476, "epoch": 1 }, { "type": "loss", "content": 0.04309564083814621, "timestamp": "2025-09-10 02:19:04.213697", "step": 1477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:04.243933", "step": 1477, "epoch": 1 }, { "type": "loss", "content": 0.0122428759932518, "timestamp": "2025-09-10 02:19:04.256107", "step": 1478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:04.286555", "step": 1478, "epoch": 1 }, { "type": "loss", "content": 0.021157732233405113, "timestamp": "2025-09-10 02:19:04.293319", "step": 1479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:04.323838", "step": 1479, "epoch": 1 }, { "type": "loss", "content": 0.00686487415805459, "timestamp": "2025-09-10 02:19:04.356900", "step": 1480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:04.387730", "step": 1480, "epoch": 1 }, { "type": "loss", "content": 0.009640970267355442, "timestamp": "2025-09-10 02:19:04.395982", "step": 1481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:04.427699", "step": 1481, "epoch": 1 }, { "type": "loss", "content": 0.0068480512127280235, "timestamp": "2025-09-10 02:19:04.438232", "step": 1482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:04.469817", "step": 1482, "epoch": 1 }, { "type": "loss", "content": 0.002509176731109619, "timestamp": "2025-09-10 02:19:04.482385", "step": 1483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:04.511920", "step": 1483, "epoch": 1 }, { "type": "loss", "content": 0.0024369838647544384, "timestamp": "2025-09-10 02:19:04.539793", "step": 1484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:04.570613", "step": 1484, "epoch": 1 }, { "type": "loss", "content": 0.012398682534694672, "timestamp": "2025-09-10 02:19:04.572544", "step": 1485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:04.603140", "step": 1485, "epoch": 1 }, { "type": "loss", "content": 0.011835220269858837, "timestamp": "2025-09-10 02:19:04.607851", "step": 1486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:04.638014", "step": 1486, "epoch": 1 }, { "type": "loss", "content": 0.004558969754725695, "timestamp": "2025-09-10 02:19:04.641892", "step": 1487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:04.672545", "step": 1487, "epoch": 1 }, { "type": "loss", "content": 0.01352360937744379, "timestamp": "2025-09-10 02:19:04.697856", "step": 1488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:04.728706", "step": 1488, "epoch": 1 }, { "type": "loss", "content": 0.00836183037608862, "timestamp": "2025-09-10 02:19:04.730897", "step": 1489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:04.765809", "step": 1489, "epoch": 1 }, { "type": "loss", "content": 0.0033379762899130583, "timestamp": "2025-09-10 02:19:04.779844", "step": 1490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:04.811017", "step": 1490, "epoch": 1 }, { "type": "loss", "content": 0.006920557469129562, "timestamp": "2025-09-10 02:19:04.813361", "step": 1491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:04.844166", "step": 1491, "epoch": 1 }, { "type": "loss", "content": 0.018455183133482933, "timestamp": "2025-09-10 02:19:04.872005", "step": 1492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:04.903125", "step": 1492, "epoch": 1 }, { "type": "loss", "content": 0.020488440990447998, "timestamp": "2025-09-10 02:19:04.911072", "step": 1493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:04.941252", "step": 1493, "epoch": 1 }, { "type": "loss", "content": 0.017999647185206413, "timestamp": "2025-09-10 02:19:04.949128", "step": 1494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:04.979967", "step": 1494, "epoch": 1 }, { "type": "loss", "content": 0.02349008433520794, "timestamp": "2025-09-10 02:19:04.983658", "step": 1495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:05.016393", "step": 1495, "epoch": 1 }, { "type": "loss", "content": 0.006956641562283039, "timestamp": "2025-09-10 02:19:05.047311", "step": 1496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:05.077419", "step": 1496, "epoch": 1 }, { "type": "loss", "content": 0.012771239504218102, "timestamp": "2025-09-10 02:19:05.085768", "step": 1497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:05.116904", "step": 1497, "epoch": 1 }, { "type": "loss", "content": 0.028537657111883163, "timestamp": "2025-09-10 02:19:05.124157", "step": 1498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:05.153637", "step": 1498, "epoch": 1 }, { "type": "loss", "content": 0.0020859253127127886, "timestamp": "2025-09-10 02:19:05.156458", "step": 1499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:05.186926", "step": 1499, "epoch": 1 }, { "type": "loss", "content": 0.019206488505005836, "timestamp": "2025-09-10 02:19:05.212161", "step": 1500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1500", "timestamp": "2025-09-10 02:19:09.902482", "step": 1500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:09.935075", "step": 1500, "epoch": 1 }, { "type": "loss", "content": 0.010249263606965542, "timestamp": "2025-09-10 02:19:09.938233", "step": 1501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:09.970383", "step": 1501, "epoch": 1 }, { "type": "loss", "content": 0.024660227820277214, "timestamp": "2025-09-10 02:19:09.979634", "step": 1502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.014194", "step": 1502, "epoch": 1 }, { "type": "loss", "content": 0.013713826425373554, "timestamp": "2025-09-10 02:19:10.021249", "step": 1503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:19:10.063827", "step": 1503, "epoch": 1 }, { "type": "loss", "content": 0.006658419966697693, "timestamp": "2025-09-10 02:19:10.102108", "step": 1504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:10.133383", "step": 1504, "epoch": 1 }, { "type": "loss", "content": 0.01818818412721157, "timestamp": "2025-09-10 02:19:10.137431", "step": 1505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.169156", "step": 1505, "epoch": 1 }, { "type": "loss", "content": 0.023030122742056847, "timestamp": "2025-09-10 02:19:10.176375", "step": 1506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:10.208117", "step": 1506, "epoch": 1 }, { "type": "loss", "content": 0.018940243870019913, "timestamp": "2025-09-10 02:19:10.212098", "step": 1507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:19:10.250410", "step": 1507, "epoch": 1 }, { "type": "loss", "content": 0.006142920348793268, "timestamp": "2025-09-10 02:19:10.287451", "step": 1508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:10.319677", "step": 1508, "epoch": 1 }, { "type": "loss", "content": 0.009842773899435997, "timestamp": "2025-09-10 02:19:10.327667", "step": 1509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:10.357940", "step": 1509, "epoch": 1 }, { "type": "loss", "content": 0.018183773383498192, "timestamp": "2025-09-10 02:19:10.365543", "step": 1510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:10.395927", "step": 1510, "epoch": 1 }, { "type": "loss", "content": 0.007604293525218964, "timestamp": "2025-09-10 02:19:10.402947", "step": 1511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:10.433895", "step": 1511, "epoch": 1 }, { "type": "loss", "content": 0.008296381682157516, "timestamp": "2025-09-10 02:19:10.461473", "step": 1512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.491791", "step": 1512, "epoch": 1 }, { "type": "loss", "content": 0.004962913691997528, "timestamp": "2025-09-10 02:19:10.496950", "step": 1513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.528647", "step": 1513, "epoch": 1 }, { "type": "loss", "content": 0.006845235824584961, "timestamp": "2025-09-10 02:19:10.536012", "step": 1514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:10.566685", "step": 1514, "epoch": 1 }, { "type": "loss", "content": 0.00035365772782824934, "timestamp": "2025-09-10 02:19:10.569706", "step": 1515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:10.600963", "step": 1515, "epoch": 1 }, { "type": "loss", "content": 0.017009198665618896, "timestamp": "2025-09-10 02:19:10.633604", "step": 1516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:10.665336", "step": 1516, "epoch": 1 }, { "type": "loss", "content": 0.011472431942820549, "timestamp": "2025-09-10 02:19:10.669734", "step": 1517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:10.700629", "step": 1517, "epoch": 1 }, { "type": "loss", "content": 0.0018043555319309235, "timestamp": "2025-09-10 02:19:10.710367", "step": 1518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:10.741714", "step": 1518, "epoch": 1 }, { "type": "loss", "content": 0.012901760637760162, "timestamp": "2025-09-10 02:19:10.754161", "step": 1519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:10.786107", "step": 1519, "epoch": 1 }, { "type": "loss", "content": 0.003726641181856394, "timestamp": "2025-09-10 02:19:10.813635", "step": 1520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:10.844031", "step": 1520, "epoch": 1 }, { "type": "loss", "content": 0.004837970249354839, "timestamp": "2025-09-10 02:19:10.848890", "step": 1521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:10.880087", "step": 1521, "epoch": 1 }, { "type": "loss", "content": 0.027267929166555405, "timestamp": "2025-09-10 02:19:10.883803", "step": 1522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.914418", "step": 1522, "epoch": 1 }, { "type": "loss", "content": 0.02369379624724388, "timestamp": "2025-09-10 02:19:10.921907", "step": 1523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:10.957545", "step": 1523, "epoch": 1 }, { "type": "loss", "content": 0.0018621442141011357, "timestamp": "2025-09-10 02:19:10.985654", "step": 1524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:11.017915", "step": 1524, "epoch": 1 }, { "type": "loss", "content": 0.01912684179842472, "timestamp": "2025-09-10 02:19:11.022677", "step": 1525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:11.056726", "step": 1525, "epoch": 1 }, { "type": "loss", "content": 0.003869327250868082, "timestamp": "2025-09-10 02:19:11.059031", "step": 1526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:11.097504", "step": 1526, "epoch": 1 }, { "type": "loss", "content": 0.0017400800716131926, "timestamp": "2025-09-10 02:19:11.113340", "step": 1527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:11.145937", "step": 1527, "epoch": 1 }, { "type": "loss", "content": 0.0031910340767353773, "timestamp": "2025-09-10 02:19:11.178488", "step": 1528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:11.209726", "step": 1528, "epoch": 1 }, { "type": "loss", "content": 0.00998393353074789, "timestamp": "2025-09-10 02:19:11.214348", "step": 1529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:11.245050", "step": 1529, "epoch": 1 }, { "type": "loss", "content": 0.007462826557457447, "timestamp": "2025-09-10 02:19:11.252114", "step": 1530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:11.282679", "step": 1530, "epoch": 1 }, { "type": "loss", "content": 0.009829898364841938, "timestamp": "2025-09-10 02:19:11.293519", "step": 1531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:11.324112", "step": 1531, "epoch": 1 }, { "type": "loss", "content": 0.011728269048035145, "timestamp": "2025-09-10 02:19:11.352538", "step": 1532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:19:11.391942", "step": 1532, "epoch": 1 }, { "type": "loss", "content": 0.009979200549423695, "timestamp": "2025-09-10 02:19:11.408936", "step": 1533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:11.440144", "step": 1533, "epoch": 1 }, { "type": "loss", "content": 0.009194576181471348, "timestamp": "2025-09-10 02:19:11.450196", "step": 1534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:11.482122", "step": 1534, "epoch": 1 }, { "type": "loss", "content": 0.0038809494581073523, "timestamp": "2025-09-10 02:19:11.485890", "step": 1535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:11.516810", "step": 1535, "epoch": 1 }, { "type": "loss", "content": 0.04445614293217659, "timestamp": "2025-09-10 02:19:11.544585", "step": 1536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:11.575947", "step": 1536, "epoch": 1 }, { "type": "loss", "content": 0.0013619901146739721, "timestamp": "2025-09-10 02:19:11.578278", "step": 1537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:11.609705", "step": 1537, "epoch": 1 }, { "type": "loss", "content": 0.008444820530712605, "timestamp": "2025-09-10 02:19:11.613684", "step": 1538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:11.645949", "step": 1538, "epoch": 1 }, { "type": "loss", "content": 0.010615772567689419, "timestamp": "2025-09-10 02:19:11.655699", "step": 1539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:11.686968", "step": 1539, "epoch": 1 }, { "type": "loss", "content": 0.018414005637168884, "timestamp": "2025-09-10 02:19:11.715405", "step": 1540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:11.746396", "step": 1540, "epoch": 1 }, { "type": "loss", "content": 0.016833599656820297, "timestamp": "2025-09-10 02:19:11.748965", "step": 1541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:11.780055", "step": 1541, "epoch": 1 }, { "type": "loss", "content": 0.021267401054501534, "timestamp": "2025-09-10 02:19:11.790787", "step": 1542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:11.820770", "step": 1542, "epoch": 1 }, { "type": "loss", "content": 0.0016013866988942027, "timestamp": "2025-09-10 02:19:11.827864", "step": 1543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:11.859009", "step": 1543, "epoch": 1 }, { "type": "loss", "content": 0.002547146985307336, "timestamp": "2025-09-10 02:19:11.889785", "step": 1544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:11.923472", "step": 1544, "epoch": 1 }, { "type": "loss", "content": 0.0054414160549640656, "timestamp": "2025-09-10 02:19:11.936361", "step": 1545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:11.967739", "step": 1545, "epoch": 1 }, { "type": "loss", "content": 0.003947163466364145, "timestamp": "2025-09-10 02:19:11.974295", "step": 1546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:12.005187", "step": 1546, "epoch": 1 }, { "type": "loss", "content": 0.011945655569434166, "timestamp": "2025-09-10 02:19:12.009348", "step": 1547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:12.043937", "step": 1547, "epoch": 1 }, { "type": "loss", "content": 0.024208705872297287, "timestamp": "2025-09-10 02:19:12.074259", "step": 1548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:12.106578", "step": 1548, "epoch": 1 }, { "type": "loss", "content": 0.0040494343265891075, "timestamp": "2025-09-10 02:19:12.111391", "step": 1549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:12.143234", "step": 1549, "epoch": 1 }, { "type": "loss", "content": 0.0034531753044575453, "timestamp": "2025-09-10 02:19:12.147564", "step": 1550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:12.178742", "step": 1550, "epoch": 1 }, { "type": "loss", "content": 0.006650130730122328, "timestamp": "2025-09-10 02:19:12.189561", "step": 1551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:12.220244", "step": 1551, "epoch": 1 }, { "type": "loss", "content": 0.015004181303083897, "timestamp": "2025-09-10 02:19:12.245449", "step": 1552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:12.276928", "step": 1552, "epoch": 1 }, { "type": "loss", "content": 0.04160107299685478, "timestamp": "2025-09-10 02:19:12.282194", "step": 1553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:12.317576", "step": 1553, "epoch": 1 }, { "type": "loss", "content": 0.0034997588954865932, "timestamp": "2025-09-10 02:19:12.324532", "step": 1554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:12.361708", "step": 1554, "epoch": 1 }, { "type": "loss", "content": 0.032703883945941925, "timestamp": "2025-09-10 02:19:12.365640", "step": 1555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:12.402175", "step": 1555, "epoch": 1 }, { "type": "loss", "content": 0.0007587299915030599, "timestamp": "2025-09-10 02:19:12.435646", "step": 1556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:12.472372", "step": 1556, "epoch": 1 }, { "type": "loss", "content": 0.01702108420431614, "timestamp": "2025-09-10 02:19:12.477555", "step": 1557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:12.512655", "step": 1557, "epoch": 1 }, { "type": "loss", "content": 0.03824358060956001, "timestamp": "2025-09-10 02:19:12.520053", "step": 1558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:12.554768", "step": 1558, "epoch": 1 }, { "type": "loss", "content": 0.04590751603245735, "timestamp": "2025-09-10 02:19:12.561526", "step": 1559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:12.593528", "step": 1559, "epoch": 1 }, { "type": "loss", "content": 0.0073296381160616875, "timestamp": "2025-09-10 02:19:12.621090", "step": 1560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:12.656359", "step": 1560, "epoch": 1 }, { "type": "loss", "content": 0.00989292562007904, "timestamp": "2025-09-10 02:19:12.665182", "step": 1561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:12.698238", "step": 1561, "epoch": 1 }, { "type": "loss", "content": 0.004150025546550751, "timestamp": "2025-09-10 02:19:12.710416", "step": 1562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:12.743156", "step": 1562, "epoch": 1 }, { "type": "loss", "content": 0.005096559878438711, "timestamp": "2025-09-10 02:19:12.750017", "step": 1563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:12.783012", "step": 1563, "epoch": 1 }, { "type": "loss", "content": 0.010926149785518646, "timestamp": "2025-09-10 02:19:12.808126", "step": 1564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:12.838762", "step": 1564, "epoch": 1 }, { "type": "loss", "content": 0.0010295318206772208, "timestamp": "2025-09-10 02:19:12.840914", "step": 1565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:12.871398", "step": 1565, "epoch": 1 }, { "type": "loss", "content": 0.03598492965102196, "timestamp": "2025-09-10 02:19:12.878101", "step": 1566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:12.908645", "step": 1566, "epoch": 1 }, { "type": "loss", "content": 0.020379869267344475, "timestamp": "2025-09-10 02:19:12.919096", "step": 1567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:12.950808", "step": 1567, "epoch": 1 }, { "type": "loss", "content": 0.014766373671591282, "timestamp": "2025-09-10 02:19:12.979285", "step": 1568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:13.013184", "step": 1568, "epoch": 1 }, { "type": "loss", "content": 0.004263672977685928, "timestamp": "2025-09-10 02:19:13.025928", "step": 1569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:13.059028", "step": 1569, "epoch": 1 }, { "type": "loss", "content": 0.003036454552784562, "timestamp": "2025-09-10 02:19:13.061593", "step": 1570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:13.092133", "step": 1570, "epoch": 1 }, { "type": "loss", "content": 0.02348274551331997, "timestamp": "2025-09-10 02:19:13.099943", "step": 1571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:13.133967", "step": 1571, "epoch": 1 }, { "type": "loss", "content": 0.004423712845891714, "timestamp": "2025-09-10 02:19:13.168482", "step": 1572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:13.201480", "step": 1572, "epoch": 1 }, { "type": "loss", "content": 0.015006057918071747, "timestamp": "2025-09-10 02:19:13.210174", "step": 1573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:13.241404", "step": 1573, "epoch": 1 }, { "type": "loss", "content": 0.01116950623691082, "timestamp": "2025-09-10 02:19:13.245612", "step": 1574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:13.277344", "step": 1574, "epoch": 1 }, { "type": "loss", "content": 0.006449028849601746, "timestamp": "2025-09-10 02:19:13.281195", "step": 1575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:13.312545", "step": 1575, "epoch": 1 }, { "type": "loss", "content": 0.013472805730998516, "timestamp": "2025-09-10 02:19:13.340182", "step": 1576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:13.371388", "step": 1576, "epoch": 1 }, { "type": "loss", "content": 0.043971676379442215, "timestamp": "2025-09-10 02:19:13.373639", "step": 1577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:13.405054", "step": 1577, "epoch": 1 }, { "type": "loss", "content": 0.0046881563030183315, "timestamp": "2025-09-10 02:19:13.411863", "step": 1578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:13.442366", "step": 1578, "epoch": 1 }, { "type": "loss", "content": 0.0024195548612624407, "timestamp": "2025-09-10 02:19:13.444544", "step": 1579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:13.475562", "step": 1579, "epoch": 1 }, { "type": "loss", "content": 0.01876218058168888, "timestamp": "2025-09-10 02:19:13.503355", "step": 1580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:13.535008", "step": 1580, "epoch": 1 }, { "type": "loss", "content": 0.012913152575492859, "timestamp": "2025-09-10 02:19:13.540082", "step": 1581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:13.570603", "step": 1581, "epoch": 1 }, { "type": "loss", "content": 0.0017830540891736746, "timestamp": "2025-09-10 02:19:13.577270", "step": 1582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:13.610802", "step": 1582, "epoch": 1 }, { "type": "loss", "content": 0.05414802208542824, "timestamp": "2025-09-10 02:19:13.622355", "step": 1583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:13.653529", "step": 1583, "epoch": 1 }, { "type": "loss", "content": 0.01702873595058918, "timestamp": "2025-09-10 02:19:13.681039", "step": 1584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:13.712174", "step": 1584, "epoch": 1 }, { "type": "loss", "content": 0.02116026170551777, "timestamp": "2025-09-10 02:19:13.719993", "step": 1585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:13.751245", "step": 1585, "epoch": 1 }, { "type": "loss", "content": 0.006673253607004881, "timestamp": "2025-09-10 02:19:13.763423", "step": 1586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:13.795855", "step": 1586, "epoch": 1 }, { "type": "loss", "content": 0.004170980304479599, "timestamp": "2025-09-10 02:19:13.802709", "step": 1587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:13.835247", "step": 1587, "epoch": 1 }, { "type": "loss", "content": 0.013706117868423462, "timestamp": "2025-09-10 02:19:13.865659", "step": 1588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:13.897377", "step": 1588, "epoch": 1 }, { "type": "loss", "content": 0.0012754879426211119, "timestamp": "2025-09-10 02:19:13.902337", "step": 1589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:13.934244", "step": 1589, "epoch": 1 }, { "type": "loss", "content": 0.002866287948563695, "timestamp": "2025-09-10 02:19:13.945068", "step": 1590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:13.975845", "step": 1590, "epoch": 1 }, { "type": "loss", "content": 0.007324092090129852, "timestamp": "2025-09-10 02:19:13.985907", "step": 1591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:14.017461", "step": 1591, "epoch": 1 }, { "type": "loss", "content": 0.02281094528734684, "timestamp": "2025-09-10 02:19:14.045784", "step": 1592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:19:14.077329", "step": 1592, "epoch": 1 }, { "type": "loss", "content": 0.005747564602643251, "timestamp": "2025-09-10 02:19:14.079549", "step": 1593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:14.110416", "step": 1593, "epoch": 1 }, { "type": "loss", "content": 0.00610779132694006, "timestamp": "2025-09-10 02:19:14.122081", "step": 1594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:14.153333", "step": 1594, "epoch": 1 }, { "type": "loss", "content": 0.02713647671043873, "timestamp": "2025-09-10 02:19:14.160177", "step": 1595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:14.190485", "step": 1595, "epoch": 1 }, { "type": "loss", "content": 0.023267099633812904, "timestamp": "2025-09-10 02:19:14.214370", "step": 1596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:14.245070", "step": 1596, "epoch": 1 }, { "type": "loss", "content": 0.0023487545549869537, "timestamp": "2025-09-10 02:19:14.252377", "step": 1597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:14.284544", "step": 1597, "epoch": 1 }, { "type": "loss", "content": 0.0011892582988366485, "timestamp": "2025-09-10 02:19:14.294391", "step": 1598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:14.325831", "step": 1598, "epoch": 1 }, { "type": "loss", "content": 0.0046529993414878845, "timestamp": "2025-09-10 02:19:14.333072", "step": 1599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:14.367251", "step": 1599, "epoch": 1 }, { "type": "loss", "content": 0.0007145693525671959, "timestamp": "2025-09-10 02:19:14.401520", "step": 1600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:14.434584", "step": 1600, "epoch": 1 }, { "type": "loss", "content": 0.030808603391051292, "timestamp": "2025-09-10 02:19:14.436496", "step": 1601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:14.468690", "step": 1601, "epoch": 1 }, { "type": "loss", "content": 0.029920728877186775, "timestamp": "2025-09-10 02:19:14.480403", "step": 1602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:14.511771", "step": 1602, "epoch": 1 }, { "type": "loss", "content": 0.010802625678479671, "timestamp": "2025-09-10 02:19:14.519362", "step": 1603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:14.550633", "step": 1603, "epoch": 1 }, { "type": "loss", "content": 0.010367213748395443, "timestamp": "2025-09-10 02:19:14.583769", "step": 1604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:14.614780", "step": 1604, "epoch": 1 }, { "type": "loss", "content": 0.004078761674463749, "timestamp": "2025-09-10 02:19:14.619101", "step": 1605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:14.650662", "step": 1605, "epoch": 1 }, { "type": "loss", "content": 0.01048226747661829, "timestamp": "2025-09-10 02:19:14.657463", "step": 1606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:14.689199", "step": 1606, "epoch": 1 }, { "type": "loss", "content": 0.020741861313581467, "timestamp": "2025-09-10 02:19:14.696470", "step": 1607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:14.727985", "step": 1607, "epoch": 1 }, { "type": "loss", "content": 0.02420172281563282, "timestamp": "2025-09-10 02:19:14.760321", "step": 1608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:14.791127", "step": 1608, "epoch": 1 }, { "type": "loss", "content": 0.011232390999794006, "timestamp": "2025-09-10 02:19:14.793190", "step": 1609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:14.824541", "step": 1609, "epoch": 1 }, { "type": "loss", "content": 0.025039060041308403, "timestamp": "2025-09-10 02:19:14.831385", "step": 1610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:14.862592", "step": 1610, "epoch": 1 }, { "type": "loss", "content": 0.010274732485413551, "timestamp": "2025-09-10 02:19:14.869832", "step": 1611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:14.901580", "step": 1611, "epoch": 1 }, { "type": "loss", "content": 0.007137875538319349, "timestamp": "2025-09-10 02:19:14.929641", "step": 1612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:14.961939", "step": 1612, "epoch": 1 }, { "type": "loss", "content": 0.006099893245846033, "timestamp": "2025-09-10 02:19:14.974929", "step": 1613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:15.013784", "step": 1613, "epoch": 1 }, { "type": "loss", "content": 0.013428665697574615, "timestamp": "2025-09-10 02:19:15.029699", "step": 1614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:15.061526", "step": 1614, "epoch": 1 }, { "type": "loss", "content": 0.02884194441139698, "timestamp": "2025-09-10 02:19:15.068318", "step": 1615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:15.099983", "step": 1615, "epoch": 1 }, { "type": "loss", "content": 0.03412342816591263, "timestamp": "2025-09-10 02:19:15.130601", "step": 1616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:15.162345", "step": 1616, "epoch": 1 }, { "type": "loss", "content": 0.016644364222884178, "timestamp": "2025-09-10 02:19:15.166419", "step": 1617, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:19:25.242216", "step": 1617, "epoch": 1 }, { "type": "pplx", "content": 14254475.265608242, "timestamp": "2025-09-10 02:19:25.257155", "step": 1617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:25.296379", "step": 1617, "epoch": 1 }, { "type": "loss", "content": 0.0030304626561701298, "timestamp": "2025-09-10 02:19:25.299826", "step": 1618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:25.332126", "step": 1618, "epoch": 1 }, { "type": "loss", "content": 0.0015295592602342367, "timestamp": "2025-09-10 02:19:25.343829", "step": 1619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:25.375724", "step": 1619, "epoch": 1 }, { "type": "loss", "content": 0.010093179531395435, "timestamp": "2025-09-10 02:19:25.403746", "step": 1620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:25.442447", "step": 1620, "epoch": 1 }, { "type": "loss", "content": 0.04103449359536171, "timestamp": "2025-09-10 02:19:25.447594", "step": 1621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:25.480406", "step": 1621, "epoch": 1 }, { "type": "loss", "content": 0.030527640134096146, "timestamp": "2025-09-10 02:19:25.492574", "step": 1622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:25.523519", "step": 1622, "epoch": 1 }, { "type": "loss", "content": 0.03347934037446976, "timestamp": "2025-09-10 02:19:25.531231", "step": 1623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:25.561588", "step": 1623, "epoch": 1 }, { "type": "loss", "content": 0.005834028124809265, "timestamp": "2025-09-10 02:19:25.590191", "step": 1624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:25.623761", "step": 1624, "epoch": 1 }, { "type": "loss", "content": 0.011887645348906517, "timestamp": "2025-09-10 02:19:25.627883", "step": 1625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:25.660861", "step": 1625, "epoch": 1 }, { "type": "loss", "content": 0.020918427035212517, "timestamp": "2025-09-10 02:19:25.671539", "step": 1626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:25.707325", "step": 1626, "epoch": 1 }, { "type": "loss", "content": 0.004566980060189962, "timestamp": "2025-09-10 02:19:25.713071", "step": 1627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:25.744771", "step": 1627, "epoch": 1 }, { "type": "loss", "content": 0.01930670067667961, "timestamp": "2025-09-10 02:19:25.769696", "step": 1628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:25.800237", "step": 1628, "epoch": 1 }, { "type": "loss", "content": 0.01942528784275055, "timestamp": "2025-09-10 02:19:25.804804", "step": 1629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:25.835511", "step": 1629, "epoch": 1 }, { "type": "loss", "content": 0.012651464901864529, "timestamp": "2025-09-10 02:19:25.845581", "step": 1630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:25.885732", "step": 1630, "epoch": 1 }, { "type": "loss", "content": 0.028396448120474815, "timestamp": "2025-09-10 02:19:25.889920", "step": 1631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:25.924490", "step": 1631, "epoch": 1 }, { "type": "loss", "content": 0.010911746881902218, "timestamp": "2025-09-10 02:19:25.955142", "step": 1632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:25.986806", "step": 1632, "epoch": 1 }, { "type": "loss", "content": 0.031062575057148933, "timestamp": "2025-09-10 02:19:25.991159", "step": 1633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:26.025471", "step": 1633, "epoch": 1 }, { "type": "loss", "content": 0.010253122076392174, "timestamp": "2025-09-10 02:19:26.034930", "step": 1634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:26.067458", "step": 1634, "epoch": 1 }, { "type": "loss", "content": 0.015442321076989174, "timestamp": "2025-09-10 02:19:26.071192", "step": 1635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:26.103396", "step": 1635, "epoch": 1 }, { "type": "loss", "content": 0.004601712804287672, "timestamp": "2025-09-10 02:19:26.128461", "step": 1636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:26.169924", "step": 1636, "epoch": 1 }, { "type": "loss", "content": 0.006141430698335171, "timestamp": "2025-09-10 02:19:26.174411", "step": 1637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:26.211983", "step": 1637, "epoch": 1 }, { "type": "loss", "content": 0.004029339645057917, "timestamp": "2025-09-10 02:19:26.215841", "step": 1638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:26.247780", "step": 1638, "epoch": 1 }, { "type": "loss", "content": 0.010412991046905518, "timestamp": "2025-09-10 02:19:26.252030", "step": 1639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:26.283809", "step": 1639, "epoch": 1 }, { "type": "loss", "content": 0.013454841449856758, "timestamp": "2025-09-10 02:19:26.312179", "step": 1640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:26.344145", "step": 1640, "epoch": 1 }, { "type": "loss", "content": 0.02152983471751213, "timestamp": "2025-09-10 02:19:26.351604", "step": 1641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:26.392487", "step": 1641, "epoch": 1 }, { "type": "loss", "content": 0.006329267751425505, "timestamp": "2025-09-10 02:19:26.399247", "step": 1642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:26.437367", "step": 1642, "epoch": 1 }, { "type": "loss", "content": 0.01075258944183588, "timestamp": "2025-09-10 02:19:26.449101", "step": 1643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:26.487476", "step": 1643, "epoch": 1 }, { "type": "loss", "content": 0.0039253514260053635, "timestamp": "2025-09-10 02:19:26.515046", "step": 1644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:26.556068", "step": 1644, "epoch": 1 }, { "type": "loss", "content": 0.012616248801350594, "timestamp": "2025-09-10 02:19:26.560104", "step": 1645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:26.597052", "step": 1645, "epoch": 1 }, { "type": "loss", "content": 0.019528865814208984, "timestamp": "2025-09-10 02:19:26.603995", "step": 1646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:26.638664", "step": 1646, "epoch": 1 }, { "type": "loss", "content": 0.0033076356630772352, "timestamp": "2025-09-10 02:19:26.646191", "step": 1647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:26.681923", "step": 1647, "epoch": 1 }, { "type": "loss", "content": 0.01205162238329649, "timestamp": "2025-09-10 02:19:26.716520", "step": 1648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:26.751236", "step": 1648, "epoch": 1 }, { "type": "loss", "content": 0.020559037104249, "timestamp": "2025-09-10 02:19:26.753290", "step": 1649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:26.784471", "step": 1649, "epoch": 1 }, { "type": "loss", "content": 0.013713809661567211, "timestamp": "2025-09-10 02:19:26.792067", "step": 1650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:26.824240", "step": 1650, "epoch": 1 }, { "type": "loss", "content": 0.026908008381724358, "timestamp": "2025-09-10 02:19:26.831139", "step": 1651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:26.864818", "step": 1651, "epoch": 1 }, { "type": "loss", "content": 0.019599396735429764, "timestamp": "2025-09-10 02:19:26.891705", "step": 1652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:26.928007", "step": 1652, "epoch": 1 }, { "type": "loss", "content": 0.02248522825539112, "timestamp": "2025-09-10 02:19:26.943137", "step": 1653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:26.976637", "step": 1653, "epoch": 1 }, { "type": "loss", "content": 0.0142592191696167, "timestamp": "2025-09-10 02:19:26.982632", "step": 1654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:27.016068", "step": 1654, "epoch": 1 }, { "type": "loss", "content": 0.020718032494187355, "timestamp": "2025-09-10 02:19:27.022551", "step": 1655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:27.054261", "step": 1655, "epoch": 1 }, { "type": "loss", "content": 0.05304405093193054, "timestamp": "2025-09-10 02:19:27.081700", "step": 1656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:27.118254", "step": 1656, "epoch": 1 }, { "type": "loss", "content": 0.040047433227300644, "timestamp": "2025-09-10 02:19:27.133381", "step": 1657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:19:27.174828", "step": 1657, "epoch": 1 }, { "type": "loss", "content": 0.02052774466574192, "timestamp": "2025-09-10 02:19:27.191899", "step": 1658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:27.225404", "step": 1658, "epoch": 1 }, { "type": "loss", "content": 0.013448750600218773, "timestamp": "2025-09-10 02:19:27.231262", "step": 1659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:27.262310", "step": 1659, "epoch": 1 }, { "type": "loss", "content": 0.005621184129267931, "timestamp": "2025-09-10 02:19:27.289514", "step": 1660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:27.320820", "step": 1660, "epoch": 1 }, { "type": "loss", "content": 0.020898720249533653, "timestamp": "2025-09-10 02:19:27.330737", "step": 1661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:27.362638", "step": 1661, "epoch": 1 }, { "type": "loss", "content": 0.017461569979786873, "timestamp": "2025-09-10 02:19:27.375213", "step": 1662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:19:27.418839", "step": 1662, "epoch": 1 }, { "type": "loss", "content": 0.023927049711346626, "timestamp": "2025-09-10 02:19:27.436385", "step": 1663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:27.472070", "step": 1663, "epoch": 1 }, { "type": "loss", "content": 0.011457578279078007, "timestamp": "2025-09-10 02:19:27.505457", "step": 1664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:27.540203", "step": 1664, "epoch": 1 }, { "type": "loss", "content": 0.0047464510425925255, "timestamp": "2025-09-10 02:19:27.544670", "step": 1665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:27.577596", "step": 1665, "epoch": 1 }, { "type": "loss", "content": 0.03707936406135559, "timestamp": "2025-09-10 02:19:27.582577", "step": 1666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:27.614332", "step": 1666, "epoch": 1 }, { "type": "loss", "content": 0.042561326175928116, "timestamp": "2025-09-10 02:19:27.617702", "step": 1667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:27.650523", "step": 1667, "epoch": 1 }, { "type": "loss", "content": 0.005984437186270952, "timestamp": "2025-09-10 02:19:27.683287", "step": 1668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:27.714055", "step": 1668, "epoch": 1 }, { "type": "loss", "content": 0.011304566636681557, "timestamp": "2025-09-10 02:19:27.722639", "step": 1669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:27.754009", "step": 1669, "epoch": 1 }, { "type": "loss", "content": 0.0555756576359272, "timestamp": "2025-09-10 02:19:27.761052", "step": 1670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:27.791616", "step": 1670, "epoch": 1 }, { "type": "loss", "content": 0.014852997846901417, "timestamp": "2025-09-10 02:19:27.795734", "step": 1671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:27.826214", "step": 1671, "epoch": 1 }, { "type": "loss", "content": 0.000465600925963372, "timestamp": "2025-09-10 02:19:27.853922", "step": 1672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:27.884679", "step": 1672, "epoch": 1 }, { "type": "loss", "content": 0.011479363776743412, "timestamp": "2025-09-10 02:19:27.889997", "step": 1673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:27.921029", "step": 1673, "epoch": 1 }, { "type": "loss", "content": 0.00886636320501566, "timestamp": "2025-09-10 02:19:27.931296", "step": 1674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:27.962835", "step": 1674, "epoch": 1 }, { "type": "loss", "content": 0.033997684717178345, "timestamp": "2025-09-10 02:19:27.969980", "step": 1675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:28.011996", "step": 1675, "epoch": 1 }, { "type": "loss", "content": 0.028899533674120903, "timestamp": "2025-09-10 02:19:28.044941", "step": 1676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:28.075988", "step": 1676, "epoch": 1 }, { "type": "loss", "content": 0.015834344550967216, "timestamp": "2025-09-10 02:19:28.080818", "step": 1677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:28.126402", "step": 1677, "epoch": 1 }, { "type": "loss", "content": 0.0010925616370514035, "timestamp": "2025-09-10 02:19:28.133192", "step": 1678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:28.165787", "step": 1678, "epoch": 1 }, { "type": "loss", "content": 0.011072367429733276, "timestamp": "2025-09-10 02:19:28.176200", "step": 1679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:19:28.215114", "step": 1679, "epoch": 1 }, { "type": "loss", "content": 0.014479962177574635, "timestamp": "2025-09-10 02:19:28.252184", "step": 1680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:28.283422", "step": 1680, "epoch": 1 }, { "type": "loss", "content": 0.005413788836449385, "timestamp": "2025-09-10 02:19:28.288347", "step": 1681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:28.323315", "step": 1681, "epoch": 1 }, { "type": "loss", "content": 0.007339499890804291, "timestamp": "2025-09-10 02:19:28.337248", "step": 1682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:28.368533", "step": 1682, "epoch": 1 }, { "type": "loss", "content": 0.010168512351810932, "timestamp": "2025-09-10 02:19:28.373074", "step": 1683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:19:28.408455", "step": 1683, "epoch": 1 }, { "type": "loss", "content": 0.007975684478878975, "timestamp": "2025-09-10 02:19:28.443073", "step": 1684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:28.473739", "step": 1684, "epoch": 1 }, { "type": "loss", "content": 0.015236958861351013, "timestamp": "2025-09-10 02:19:28.478172", "step": 1685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:28.508675", "step": 1685, "epoch": 1 }, { "type": "loss", "content": 0.010547908022999763, "timestamp": "2025-09-10 02:19:28.516263", "step": 1686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:28.547696", "step": 1686, "epoch": 1 }, { "type": "loss", "content": 0.0036612115800380707, "timestamp": "2025-09-10 02:19:28.554535", "step": 1687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:28.587864", "step": 1687, "epoch": 1 }, { "type": "loss", "content": 0.02034112438559532, "timestamp": "2025-09-10 02:19:28.622149", "step": 1688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:28.657085", "step": 1688, "epoch": 1 }, { "type": "loss", "content": 0.012231721542775631, "timestamp": "2025-09-10 02:19:28.667076", "step": 1689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:28.701677", "step": 1689, "epoch": 1 }, { "type": "loss", "content": 0.006200658623129129, "timestamp": "2025-09-10 02:19:28.710829", "step": 1690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:28.741882", "step": 1690, "epoch": 1 }, { "type": "loss", "content": 0.011389417573809624, "timestamp": "2025-09-10 02:19:28.746123", "step": 1691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:28.777247", "step": 1691, "epoch": 1 }, { "type": "loss", "content": 0.028033211827278137, "timestamp": "2025-09-10 02:19:28.805545", "step": 1692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:28.837135", "step": 1692, "epoch": 1 }, { "type": "loss", "content": 0.007890121079981327, "timestamp": "2025-09-10 02:19:28.841907", "step": 1693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:28.872512", "step": 1693, "epoch": 1 }, { "type": "loss", "content": 0.03063378855586052, "timestamp": "2025-09-10 02:19:28.875117", "step": 1694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:28.905933", "step": 1694, "epoch": 1 }, { "type": "loss", "content": 0.0022948638070374727, "timestamp": "2025-09-10 02:19:28.916771", "step": 1695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:28.947133", "step": 1695, "epoch": 1 }, { "type": "loss", "content": 0.002465051133185625, "timestamp": "2025-09-10 02:19:28.978206", "step": 1696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:29.008980", "step": 1696, "epoch": 1 }, { "type": "loss", "content": 0.009558682329952717, "timestamp": "2025-09-10 02:19:29.018783", "step": 1697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:29.049904", "step": 1697, "epoch": 1 }, { "type": "loss", "content": 0.016664791852235794, "timestamp": "2025-09-10 02:19:29.057524", "step": 1698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:29.091977", "step": 1698, "epoch": 1 }, { "type": "loss", "content": 0.013536560349166393, "timestamp": "2025-09-10 02:19:29.105628", "step": 1699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:29.144132", "step": 1699, "epoch": 1 }, { "type": "loss", "content": 0.008019420318305492, "timestamp": "2025-09-10 02:19:29.180893", "step": 1700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:29.210938", "step": 1700, "epoch": 1 }, { "type": "loss", "content": 0.010356190614402294, "timestamp": "2025-09-10 02:19:29.215979", "step": 1701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:29.256203", "step": 1701, "epoch": 1 }, { "type": "loss", "content": 0.016179528087377548, "timestamp": "2025-09-10 02:19:29.260046", "step": 1702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:29.293405", "step": 1702, "epoch": 1 }, { "type": "loss", "content": 0.018332941457629204, "timestamp": "2025-09-10 02:19:29.306772", "step": 1703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:29.338414", "step": 1703, "epoch": 1 }, { "type": "loss", "content": 0.02060030773282051, "timestamp": "2025-09-10 02:19:29.369647", "step": 1704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:29.403513", "step": 1704, "epoch": 1 }, { "type": "loss", "content": 0.0144452890381217, "timestamp": "2025-09-10 02:19:29.411462", "step": 1705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:29.442717", "step": 1705, "epoch": 1 }, { "type": "loss", "content": 0.016921203583478928, "timestamp": "2025-09-10 02:19:29.449999", "step": 1706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:29.480120", "step": 1706, "epoch": 1 }, { "type": "loss", "content": 0.03076860122382641, "timestamp": "2025-09-10 02:19:29.486961", "step": 1707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:29.518046", "step": 1707, "epoch": 1 }, { "type": "loss", "content": 0.010674857534468174, "timestamp": "2025-09-10 02:19:29.551498", "step": 1708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:29.582268", "step": 1708, "epoch": 1 }, { "type": "loss", "content": 0.004811963532119989, "timestamp": "2025-09-10 02:19:29.587283", "step": 1709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:29.617891", "step": 1709, "epoch": 1 }, { "type": "loss", "content": 0.02059813216328621, "timestamp": "2025-09-10 02:19:29.628103", "step": 1710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:29.661190", "step": 1710, "epoch": 1 }, { "type": "loss", "content": 0.04477255791425705, "timestamp": "2025-09-10 02:19:29.668702", "step": 1711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:29.699404", "step": 1711, "epoch": 1 }, { "type": "loss", "content": 0.022556250914931297, "timestamp": "2025-09-10 02:19:29.727279", "step": 1712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:29.760289", "step": 1712, "epoch": 1 }, { "type": "loss", "content": 0.014114036224782467, "timestamp": "2025-09-10 02:19:29.773276", "step": 1713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:29.803259", "step": 1713, "epoch": 1 }, { "type": "loss", "content": 0.02027253620326519, "timestamp": "2025-09-10 02:19:29.810376", "step": 1714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:29.840408", "step": 1714, "epoch": 1 }, { "type": "loss", "content": 0.004980639088898897, "timestamp": "2025-09-10 02:19:29.844370", "step": 1715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:29.877268", "step": 1715, "epoch": 1 }, { "type": "loss", "content": 0.01874137483537197, "timestamp": "2025-09-10 02:19:29.905832", "step": 1716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:29.936597", "step": 1716, "epoch": 1 }, { "type": "loss", "content": 0.00487111508846283, "timestamp": "2025-09-10 02:19:29.938607", "step": 1717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:19:29.984718", "step": 1717, "epoch": 1 }, { "type": "loss", "content": 0.008420931175351143, "timestamp": "2025-09-10 02:19:30.003909", "step": 1718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:30.034431", "step": 1718, "epoch": 1 }, { "type": "loss", "content": 0.00927684735506773, "timestamp": "2025-09-10 02:19:30.041409", "step": 1719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:30.074103", "step": 1719, "epoch": 1 }, { "type": "loss", "content": 0.00768580287694931, "timestamp": "2025-09-10 02:19:30.107605", "step": 1720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:19:30.137477", "step": 1720, "epoch": 1 }, { "type": "loss", "content": 0.01326842326670885, "timestamp": "2025-09-10 02:19:30.139625", "step": 1721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:30.171634", "step": 1721, "epoch": 1 }, { "type": "loss", "content": 0.004389611072838306, "timestamp": "2025-09-10 02:19:30.182588", "step": 1722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:30.213265", "step": 1722, "epoch": 1 }, { "type": "loss", "content": 0.020359130576252937, "timestamp": "2025-09-10 02:19:30.217724", "step": 1723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:30.253023", "step": 1723, "epoch": 1 }, { "type": "loss", "content": 0.019980600103735924, "timestamp": "2025-09-10 02:19:30.287976", "step": 1724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:30.319961", "step": 1724, "epoch": 1 }, { "type": "loss", "content": 0.012974241748452187, "timestamp": "2025-09-10 02:19:30.327257", "step": 1725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:30.359348", "step": 1725, "epoch": 1 }, { "type": "loss", "content": 0.012521286495029926, "timestamp": "2025-09-10 02:19:30.367171", "step": 1726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:30.401011", "step": 1726, "epoch": 1 }, { "type": "loss", "content": 0.01130104623734951, "timestamp": "2025-09-10 02:19:30.414735", "step": 1727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:30.446730", "step": 1727, "epoch": 1 }, { "type": "loss", "content": 0.032924991101026535, "timestamp": "2025-09-10 02:19:30.475325", "step": 1728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:30.507565", "step": 1728, "epoch": 1 }, { "type": "loss", "content": 0.01869148574769497, "timestamp": "2025-09-10 02:19:30.511832", "step": 1729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:30.542177", "step": 1729, "epoch": 1 }, { "type": "loss", "content": 0.003208654234185815, "timestamp": "2025-09-10 02:19:30.544625", "step": 1730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:30.574883", "step": 1730, "epoch": 1 }, { "type": "loss", "content": 0.008543224073946476, "timestamp": "2025-09-10 02:19:30.577567", "step": 1731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:30.608550", "step": 1731, "epoch": 1 }, { "type": "loss", "content": 0.009434954263269901, "timestamp": "2025-09-10 02:19:30.636421", "step": 1732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:30.673056", "step": 1732, "epoch": 1 }, { "type": "loss", "content": 0.05541825294494629, "timestamp": "2025-09-10 02:19:30.688495", "step": 1733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:30.719019", "step": 1733, "epoch": 1 }, { "type": "loss", "content": 0.0067622484639286995, "timestamp": "2025-09-10 02:19:30.723483", "step": 1734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:30.754703", "step": 1734, "epoch": 1 }, { "type": "loss", "content": 0.015380342490971088, "timestamp": "2025-09-10 02:19:30.762473", "step": 1735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:30.792914", "step": 1735, "epoch": 1 }, { "type": "loss", "content": 0.017134329304099083, "timestamp": "2025-09-10 02:19:30.821094", "step": 1736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:30.851110", "step": 1736, "epoch": 1 }, { "type": "loss", "content": 0.03203447908163071, "timestamp": "2025-09-10 02:19:30.853371", "step": 1737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:30.892062", "step": 1737, "epoch": 1 }, { "type": "loss", "content": 0.003379482077434659, "timestamp": "2025-09-10 02:19:30.898984", "step": 1738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:30.939908", "step": 1738, "epoch": 1 }, { "type": "loss", "content": 0.011437847279012203, "timestamp": "2025-09-10 02:19:30.953293", "step": 1739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:30.985412", "step": 1739, "epoch": 1 }, { "type": "loss", "content": 0.004656031262129545, "timestamp": "2025-09-10 02:19:31.014134", "step": 1740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:31.045209", "step": 1740, "epoch": 1 }, { "type": "loss", "content": 0.003976056352257729, "timestamp": "2025-09-10 02:19:31.050582", "step": 1741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:31.083172", "step": 1741, "epoch": 1 }, { "type": "loss", "content": 0.004918371327221394, "timestamp": "2025-09-10 02:19:31.087586", "step": 1742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:19:31.141165", "step": 1742, "epoch": 1 }, { "type": "loss", "content": 0.026369964703917503, "timestamp": "2025-09-10 02:19:31.162721", "step": 1743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:31.193380", "step": 1743, "epoch": 1 }, { "type": "loss", "content": 0.0038020031061023474, "timestamp": "2025-09-10 02:19:31.217175", "step": 1744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:31.247831", "step": 1744, "epoch": 1 }, { "type": "loss", "content": 0.0021418523974716663, "timestamp": "2025-09-10 02:19:31.257520", "step": 1745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:31.288992", "step": 1745, "epoch": 1 }, { "type": "loss", "content": 0.00539687043055892, "timestamp": "2025-09-10 02:19:31.292992", "step": 1746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:31.323652", "step": 1746, "epoch": 1 }, { "type": "loss", "content": 0.003791423747316003, "timestamp": "2025-09-10 02:19:31.327975", "step": 1747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:31.358015", "step": 1747, "epoch": 1 }, { "type": "loss", "content": 0.005174871999770403, "timestamp": "2025-09-10 02:19:31.381555", "step": 1748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:31.413188", "step": 1748, "epoch": 1 }, { "type": "loss", "content": 0.009157408960163593, "timestamp": "2025-09-10 02:19:31.415329", "step": 1749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:31.445518", "step": 1749, "epoch": 1 }, { "type": "loss", "content": 0.012222186662256718, "timestamp": "2025-09-10 02:19:31.448319", "step": 1750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:19:31.487613", "step": 1750, "epoch": 1 }, { "type": "loss", "content": 0.013511120341718197, "timestamp": "2025-09-10 02:19:31.503936", "step": 1751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:31.537947", "step": 1751, "epoch": 1 }, { "type": "loss", "content": 0.018908429890871048, "timestamp": "2025-09-10 02:19:31.572494", "step": 1752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:31.603343", "step": 1752, "epoch": 1 }, { "type": "loss", "content": 0.00666068447753787, "timestamp": "2025-09-10 02:19:31.611216", "step": 1753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:31.642078", "step": 1753, "epoch": 1 }, { "type": "loss", "content": 0.012360441498458385, "timestamp": "2025-09-10 02:19:31.652141", "step": 1754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:31.684148", "step": 1754, "epoch": 1 }, { "type": "loss", "content": 0.003066555829718709, "timestamp": "2025-09-10 02:19:31.691911", "step": 1755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:31.722517", "step": 1755, "epoch": 1 }, { "type": "loss", "content": 0.02608044445514679, "timestamp": "2025-09-10 02:19:31.750820", "step": 1756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:31.781433", "step": 1756, "epoch": 1 }, { "type": "loss", "content": 0.0021821721456944942, "timestamp": "2025-09-10 02:19:31.786006", "step": 1757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:31.817734", "step": 1757, "epoch": 1 }, { "type": "loss", "content": 0.019431469962000847, "timestamp": "2025-09-10 02:19:31.821666", "step": 1758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:31.851856", "step": 1758, "epoch": 1 }, { "type": "loss", "content": 0.008259564638137817, "timestamp": "2025-09-10 02:19:31.856444", "step": 1759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:31.886455", "step": 1759, "epoch": 1 }, { "type": "loss", "content": 0.00771870044991374, "timestamp": "2025-09-10 02:19:31.918190", "step": 1760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:31.949685", "step": 1760, "epoch": 1 }, { "type": "loss", "content": 0.007215961813926697, "timestamp": "2025-09-10 02:19:31.952016", "step": 1761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:31.983238", "step": 1761, "epoch": 1 }, { "type": "loss", "content": 0.0035610010381788015, "timestamp": "2025-09-10 02:19:31.995161", "step": 1762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:32.026241", "step": 1762, "epoch": 1 }, { "type": "loss", "content": 0.011534550227224827, "timestamp": "2025-09-10 02:19:32.033825", "step": 1763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:32.065156", "step": 1763, "epoch": 1 }, { "type": "loss", "content": 0.009490884840488434, "timestamp": "2025-09-10 02:19:32.096112", "step": 1764, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:19:42.252845", "step": 1764, "epoch": 1 }, { "type": "pplx", "content": 13646644.047763163, "timestamp": "2025-09-10 02:19:42.255566", "step": 1764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:42.285893", "step": 1764, "epoch": 1 }, { "type": "loss", "content": 0.009771243669092655, "timestamp": "2025-09-10 02:19:42.288021", "step": 1765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:42.319947", "step": 1765, "epoch": 1 }, { "type": "loss", "content": 0.0313577726483345, "timestamp": "2025-09-10 02:19:42.326528", "step": 1766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:42.358434", "step": 1766, "epoch": 1 }, { "type": "loss", "content": 0.014947721734642982, "timestamp": "2025-09-10 02:19:42.368142", "step": 1767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:42.399916", "step": 1767, "epoch": 1 }, { "type": "loss", "content": 0.006268322933465242, "timestamp": "2025-09-10 02:19:42.424984", "step": 1768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:42.457111", "step": 1768, "epoch": 1 }, { "type": "loss", "content": 0.0015659164637327194, "timestamp": "2025-09-10 02:19:42.461865", "step": 1769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:42.493385", "step": 1769, "epoch": 1 }, { "type": "loss", "content": 0.023305343464016914, "timestamp": "2025-09-10 02:19:42.500141", "step": 1770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:42.533254", "step": 1770, "epoch": 1 }, { "type": "loss", "content": 0.027237599715590477, "timestamp": "2025-09-10 02:19:42.540707", "step": 1771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:42.572132", "step": 1771, "epoch": 1 }, { "type": "loss", "content": 0.0052889627404510975, "timestamp": "2025-09-10 02:19:42.600096", "step": 1772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:42.633366", "step": 1772, "epoch": 1 }, { "type": "loss", "content": 0.008854638785123825, "timestamp": "2025-09-10 02:19:42.642834", "step": 1773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:42.673736", "step": 1773, "epoch": 1 }, { "type": "loss", "content": 0.0037044784985482693, "timestamp": "2025-09-10 02:19:42.680657", "step": 1774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:42.712060", "step": 1774, "epoch": 1 }, { "type": "loss", "content": 0.0077804699540138245, "timestamp": "2025-09-10 02:19:42.719552", "step": 1775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:42.750994", "step": 1775, "epoch": 1 }, { "type": "loss", "content": 0.004913232754915953, "timestamp": "2025-09-10 02:19:42.778581", "step": 1776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:42.811582", "step": 1776, "epoch": 1 }, { "type": "loss", "content": 0.051165949553251266, "timestamp": "2025-09-10 02:19:42.824296", "step": 1777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:42.856365", "step": 1777, "epoch": 1 }, { "type": "loss", "content": 0.004899430554360151, "timestamp": "2025-09-10 02:19:42.867271", "step": 1778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:42.897553", "step": 1778, "epoch": 1 }, { "type": "loss", "content": 0.05997491627931595, "timestamp": "2025-09-10 02:19:42.900109", "step": 1779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:42.931479", "step": 1779, "epoch": 1 }, { "type": "loss", "content": 0.031111031770706177, "timestamp": "2025-09-10 02:19:42.963258", "step": 1780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:42.996526", "step": 1780, "epoch": 1 }, { "type": "loss", "content": 0.01209025364369154, "timestamp": "2025-09-10 02:19:43.009852", "step": 1781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:43.042551", "step": 1781, "epoch": 1 }, { "type": "loss", "content": 0.004403825383633375, "timestamp": "2025-09-10 02:19:43.053498", "step": 1782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:43.085270", "step": 1782, "epoch": 1 }, { "type": "loss", "content": 0.022716468200087547, "timestamp": "2025-09-10 02:19:43.089510", "step": 1783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:43.127488", "step": 1783, "epoch": 1 }, { "type": "loss", "content": 0.04864273592829704, "timestamp": "2025-09-10 02:19:43.164051", "step": 1784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:43.195469", "step": 1784, "epoch": 1 }, { "type": "loss", "content": 0.015554594807326794, "timestamp": "2025-09-10 02:19:43.203965", "step": 1785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:43.234101", "step": 1785, "epoch": 1 }, { "type": "loss", "content": 0.005889651831239462, "timestamp": "2025-09-10 02:19:43.236636", "step": 1786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:43.267481", "step": 1786, "epoch": 1 }, { "type": "loss", "content": 0.009219110012054443, "timestamp": "2025-09-10 02:19:43.274228", "step": 1787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:19:43.305143", "step": 1787, "epoch": 1 }, { "type": "loss", "content": 0.011448384262621403, "timestamp": "2025-09-10 02:19:43.328394", "step": 1788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:43.358848", "step": 1788, "epoch": 1 }, { "type": "loss", "content": 0.006459720432758331, "timestamp": "2025-09-10 02:19:43.363268", "step": 1789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:43.395200", "step": 1789, "epoch": 1 }, { "type": "loss", "content": 0.007149911485612392, "timestamp": "2025-09-10 02:19:43.398808", "step": 1790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:43.437204", "step": 1790, "epoch": 1 }, { "type": "loss", "content": 0.008804053999483585, "timestamp": "2025-09-10 02:19:43.452810", "step": 1791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:43.484768", "step": 1791, "epoch": 1 }, { "type": "loss", "content": 0.008087508380413055, "timestamp": "2025-09-10 02:19:43.515258", "step": 1792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:43.549534", "step": 1792, "epoch": 1 }, { "type": "loss", "content": 0.02685811184346676, "timestamp": "2025-09-10 02:19:43.562840", "step": 1793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:43.595065", "step": 1793, "epoch": 1 }, { "type": "loss", "content": 0.007293777074664831, "timestamp": "2025-09-10 02:19:43.602126", "step": 1794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:43.635093", "step": 1794, "epoch": 1 }, { "type": "loss", "content": 0.011456483043730259, "timestamp": "2025-09-10 02:19:43.639291", "step": 1795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:43.672132", "step": 1795, "epoch": 1 }, { "type": "loss", "content": 0.02691769227385521, "timestamp": "2025-09-10 02:19:43.696135", "step": 1796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:43.727428", "step": 1796, "epoch": 1 }, { "type": "loss", "content": 0.0008614645921625197, "timestamp": "2025-09-10 02:19:43.729755", "step": 1797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:43.761349", "step": 1797, "epoch": 1 }, { "type": "loss", "content": 0.01995791494846344, "timestamp": "2025-09-10 02:19:43.768894", "step": 1798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:43.799930", "step": 1798, "epoch": 1 }, { "type": "loss", "content": 0.013489159755408764, "timestamp": "2025-09-10 02:19:43.806777", "step": 1799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:43.837880", "step": 1799, "epoch": 1 }, { "type": "loss", "content": 0.014494777657091618, "timestamp": "2025-09-10 02:19:43.870637", "step": 1800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:43.903188", "step": 1800, "epoch": 1 }, { "type": "loss", "content": 0.02528318762779236, "timestamp": "2025-09-10 02:19:43.915834", "step": 1801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:43.949804", "step": 1801, "epoch": 1 }, { "type": "loss", "content": 0.0043422463349998, "timestamp": "2025-09-10 02:19:43.963175", "step": 1802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:43.994251", "step": 1802, "epoch": 1 }, { "type": "loss", "content": 0.0019023737404495478, "timestamp": "2025-09-10 02:19:43.996710", "step": 1803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:44.027467", "step": 1803, "epoch": 1 }, { "type": "loss", "content": 0.01238183956593275, "timestamp": "2025-09-10 02:19:44.052681", "step": 1804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:44.083781", "step": 1804, "epoch": 1 }, { "type": "loss", "content": 0.01738804019987583, "timestamp": "2025-09-10 02:19:44.086049", "step": 1805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:44.117517", "step": 1805, "epoch": 1 }, { "type": "loss", "content": 0.0023342971689999104, "timestamp": "2025-09-10 02:19:44.125013", "step": 1806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:44.160189", "step": 1806, "epoch": 1 }, { "type": "loss", "content": 0.005149137694388628, "timestamp": "2025-09-10 02:19:44.173597", "step": 1807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:44.204211", "step": 1807, "epoch": 1 }, { "type": "loss", "content": 0.0011267533991485834, "timestamp": "2025-09-10 02:19:44.229782", "step": 1808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:44.261247", "step": 1808, "epoch": 1 }, { "type": "loss", "content": 0.022445213049650192, "timestamp": "2025-09-10 02:19:44.269737", "step": 1809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:44.301470", "step": 1809, "epoch": 1 }, { "type": "loss", "content": 0.034575022757053375, "timestamp": "2025-09-10 02:19:44.311176", "step": 1810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:44.342260", "step": 1810, "epoch": 1 }, { "type": "loss", "content": 0.002625245600938797, "timestamp": "2025-09-10 02:19:44.344711", "step": 1811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:44.376014", "step": 1811, "epoch": 1 }, { "type": "loss", "content": 0.00602992856875062, "timestamp": "2025-09-10 02:19:44.408890", "step": 1812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:44.440634", "step": 1812, "epoch": 1 }, { "type": "loss", "content": 0.003411894431337714, "timestamp": "2025-09-10 02:19:44.444840", "step": 1813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:44.476205", "step": 1813, "epoch": 1 }, { "type": "loss", "content": 0.015310808084905148, "timestamp": "2025-09-10 02:19:44.486556", "step": 1814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:44.517642", "step": 1814, "epoch": 1 }, { "type": "loss", "content": 0.005168873351067305, "timestamp": "2025-09-10 02:19:44.529701", "step": 1815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:44.562139", "step": 1815, "epoch": 1 }, { "type": "loss", "content": 0.008812850341200829, "timestamp": "2025-09-10 02:19:44.585695", "step": 1816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:44.618878", "step": 1816, "epoch": 1 }, { "type": "loss", "content": 0.014714348129928112, "timestamp": "2025-09-10 02:19:44.631876", "step": 1817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:44.664299", "step": 1817, "epoch": 1 }, { "type": "loss", "content": 0.012252910062670708, "timestamp": "2025-09-10 02:19:44.674674", "step": 1818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:44.705225", "step": 1818, "epoch": 1 }, { "type": "loss", "content": 0.03599643334746361, "timestamp": "2025-09-10 02:19:44.712363", "step": 1819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:44.743233", "step": 1819, "epoch": 1 }, { "type": "loss", "content": 0.011372431181371212, "timestamp": "2025-09-10 02:19:44.775022", "step": 1820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:44.806401", "step": 1820, "epoch": 1 }, { "type": "loss", "content": 0.002817036584019661, "timestamp": "2025-09-10 02:19:44.810885", "step": 1821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:44.845983", "step": 1821, "epoch": 1 }, { "type": "loss", "content": 0.007789141498506069, "timestamp": "2025-09-10 02:19:44.860026", "step": 1822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:44.892057", "step": 1822, "epoch": 1 }, { "type": "loss", "content": 0.02494768425822258, "timestamp": "2025-09-10 02:19:44.898761", "step": 1823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:44.931244", "step": 1823, "epoch": 1 }, { "type": "loss", "content": 0.028473839163780212, "timestamp": "2025-09-10 02:19:44.964020", "step": 1824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:44.996431", "step": 1824, "epoch": 1 }, { "type": "loss", "content": 0.028283346444368362, "timestamp": "2025-09-10 02:19:45.000490", "step": 1825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.031640", "step": 1825, "epoch": 1 }, { "type": "loss", "content": 0.03897113725543022, "timestamp": "2025-09-10 02:19:45.039246", "step": 1826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:45.071557", "step": 1826, "epoch": 1 }, { "type": "loss", "content": 0.018188832327723503, "timestamp": "2025-09-10 02:19:45.078189", "step": 1827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:45.109791", "step": 1827, "epoch": 1 }, { "type": "loss", "content": 0.03504600375890732, "timestamp": "2025-09-10 02:19:45.133580", "step": 1828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:45.165973", "step": 1828, "epoch": 1 }, { "type": "loss", "content": 0.01439552940428257, "timestamp": "2025-09-10 02:19:45.170654", "step": 1829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:45.202156", "step": 1829, "epoch": 1 }, { "type": "loss", "content": 0.0034188800491392612, "timestamp": "2025-09-10 02:19:45.212188", "step": 1830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:45.242754", "step": 1830, "epoch": 1 }, { "type": "loss", "content": 0.0018745275447145104, "timestamp": "2025-09-10 02:19:45.249485", "step": 1831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:45.281073", "step": 1831, "epoch": 1 }, { "type": "loss", "content": 0.01610477827489376, "timestamp": "2025-09-10 02:19:45.306446", "step": 1832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:45.337756", "step": 1832, "epoch": 1 }, { "type": "loss", "content": 0.00925888679921627, "timestamp": "2025-09-10 02:19:45.340062", "step": 1833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.372125", "step": 1833, "epoch": 1 }, { "type": "loss", "content": 0.019423970952630043, "timestamp": "2025-09-10 02:19:45.379888", "step": 1834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.412255", "step": 1834, "epoch": 1 }, { "type": "loss", "content": 0.004022897686809301, "timestamp": "2025-09-10 02:19:45.419864", "step": 1835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:45.451179", "step": 1835, "epoch": 1 }, { "type": "loss", "content": 0.05194368213415146, "timestamp": "2025-09-10 02:19:45.482178", "step": 1836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:45.514061", "step": 1836, "epoch": 1 }, { "type": "loss", "content": 0.02164938487112522, "timestamp": "2025-09-10 02:19:45.518937", "step": 1837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:45.549764", "step": 1837, "epoch": 1 }, { "type": "loss", "content": 0.006102901417762041, "timestamp": "2025-09-10 02:19:45.561812", "step": 1838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:45.592699", "step": 1838, "epoch": 1 }, { "type": "loss", "content": 0.029408836737275124, "timestamp": "2025-09-10 02:19:45.599380", "step": 1839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:45.631255", "step": 1839, "epoch": 1 }, { "type": "loss", "content": 0.002608294365927577, "timestamp": "2025-09-10 02:19:45.663366", "step": 1840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:45.695272", "step": 1840, "epoch": 1 }, { "type": "loss", "content": 0.007881422527134418, "timestamp": "2025-09-10 02:19:45.697541", "step": 1841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:45.728912", "step": 1841, "epoch": 1 }, { "type": "loss", "content": 0.0055758110247552395, "timestamp": "2025-09-10 02:19:45.736321", "step": 1842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.767790", "step": 1842, "epoch": 1 }, { "type": "loss", "content": 0.010343975387513638, "timestamp": "2025-09-10 02:19:45.775320", "step": 1843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.806451", "step": 1843, "epoch": 1 }, { "type": "loss", "content": 0.011013594456017017, "timestamp": "2025-09-10 02:19:45.835059", "step": 1844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:45.866731", "step": 1844, "epoch": 1 }, { "type": "loss", "content": 0.014858272857964039, "timestamp": "2025-09-10 02:19:45.871140", "step": 1845, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:45.903377", "step": 1845, "epoch": 1 }, { "type": "loss", "content": 0.016053643077611923, "timestamp": "2025-09-10 02:19:45.910764", "step": 1846, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:45.943260", "step": 1846, "epoch": 1 }, { "type": "loss", "content": 0.0070062256418168545, "timestamp": "2025-09-10 02:19:45.953665", "step": 1847, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:45.989305", "step": 1847, "epoch": 1 }, { "type": "loss", "content": 0.005651051644235849, "timestamp": "2025-09-10 02:19:46.023820", "step": 1848, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:46.055949", "step": 1848, "epoch": 1 }, { "type": "loss", "content": 0.022787367925047874, "timestamp": "2025-09-10 02:19:46.060181", "step": 1849, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:46.090933", "step": 1849, "epoch": 1 }, { "type": "loss", "content": 0.016836825758218765, "timestamp": "2025-09-10 02:19:46.098375", "step": 1850, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:46.128908", "step": 1850, "epoch": 1 }, { "type": "loss", "content": 0.022327521815896034, "timestamp": "2025-09-10 02:19:46.136274", "step": 1851, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:46.174803", "step": 1851, "epoch": 1 }, { "type": "loss", "content": 0.016063082963228226, "timestamp": "2025-09-10 02:19:46.211563", "step": 1852, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 848 ], "flops": 25154260214720 }, "timestamp": "2025-09-10 02:19:46.280708", "step": 1852, "epoch": 1 }, { "type": "loss", "content": 0.0024673263542354107, "timestamp": "2025-09-10 02:19:46.310219", "step": 1853, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:46.346698", "step": 1853, "epoch": 1 }, { "type": "loss", "content": 0.004926327615976334, "timestamp": "2025-09-10 02:19:46.359274", "step": 1854, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:46.391250", "step": 1854, "epoch": 1 }, { "type": "loss", "content": 0.011958016082644463, "timestamp": "2025-09-10 02:19:46.395459", "step": 1855, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:46.427033", "step": 1855, "epoch": 1 }, { "type": "loss", "content": 0.007356296759098768, "timestamp": "2025-09-10 02:19:46.452303", "step": 1856, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:46.483238", "step": 1856, "epoch": 1 }, { "type": "loss", "content": 0.010786294937133789, "timestamp": "2025-09-10 02:19:46.491004", "step": 1857, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:46.522074", "step": 1857, "epoch": 1 }, { "type": "loss", "content": 0.013343775644898415, "timestamp": "2025-09-10 02:19:46.528769", "step": 1858, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:46.563654", "step": 1858, "epoch": 1 }, { "type": "loss", "content": 0.022976329550147057, "timestamp": "2025-09-10 02:19:46.577360", "step": 1859, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:46.612402", "step": 1859, "epoch": 1 }, { "type": "loss", "content": 0.023406516760587692, "timestamp": "2025-09-10 02:19:46.647250", "step": 1860, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:46.677341", "step": 1860, "epoch": 1 }, { "type": "loss", "content": 0.009630659595131874, "timestamp": "2025-09-10 02:19:46.679555", "step": 1861, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:19:46.721859", "step": 1861, "epoch": 1 }, { "type": "loss", "content": 0.010469197295606136, "timestamp": "2025-09-10 02:19:46.739610", "step": 1862, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:19:46.779453", "step": 1862, "epoch": 1 }, { "type": "loss", "content": 0.004274784587323666, "timestamp": "2025-09-10 02:19:46.795659", "step": 1863, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:46.826679", "step": 1863, "epoch": 1 }, { "type": "loss", "content": 0.020305419340729713, "timestamp": "2025-09-10 02:19:46.851960", "step": 1864, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:46.882972", "step": 1864, "epoch": 1 }, { "type": "loss", "content": 0.01911826804280281, "timestamp": "2025-09-10 02:19:46.893468", "step": 1865, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:46.924922", "step": 1865, "epoch": 1 }, { "type": "loss", "content": 0.006116253789514303, "timestamp": "2025-09-10 02:19:46.932746", "step": 1866, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:46.963830", "step": 1866, "epoch": 1 }, { "type": "loss", "content": 0.03426285460591316, "timestamp": "2025-09-10 02:19:46.974691", "step": 1867, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:47.005402", "step": 1867, "epoch": 1 }, { "type": "loss", "content": 0.01511828787624836, "timestamp": "2025-09-10 02:19:47.037027", "step": 1868, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:47.067742", "step": 1868, "epoch": 1 }, { "type": "loss", "content": 0.03478477522730827, "timestamp": "2025-09-10 02:19:47.072398", "step": 1869, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:47.105643", "step": 1869, "epoch": 1 }, { "type": "loss", "content": 0.0066609373316168785, "timestamp": "2025-09-10 02:19:47.119012", "step": 1870, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:47.149847", "step": 1870, "epoch": 1 }, { "type": "loss", "content": 0.009698964655399323, "timestamp": "2025-09-10 02:19:47.156680", "step": 1871, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:47.187143", "step": 1871, "epoch": 1 }, { "type": "loss", "content": 0.027642009779810905, "timestamp": "2025-09-10 02:19:47.218357", "step": 1872, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:47.248765", "step": 1872, "epoch": 1 }, { "type": "loss", "content": 0.006270275916904211, "timestamp": "2025-09-10 02:19:47.253531", "step": 1873, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:19:47.283393", "step": 1873, "epoch": 1 }, { "type": "loss", "content": 0.0051859780214726925, "timestamp": "2025-09-10 02:19:47.285402", "step": 1874, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:47.323399", "step": 1874, "epoch": 1 }, { "type": "loss", "content": 0.013219809159636497, "timestamp": "2025-09-10 02:19:47.338950", "step": 1875, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:47.369922", "step": 1875, "epoch": 1 }, { "type": "loss", "content": 0.00313380965963006, "timestamp": "2025-09-10 02:19:47.395333", "step": 1876, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:47.426344", "step": 1876, "epoch": 1 }, { "type": "loss", "content": 0.017517106607556343, "timestamp": "2025-09-10 02:19:47.436727", "step": 1877, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:47.468272", "step": 1877, "epoch": 1 }, { "type": "loss", "content": 0.04642176628112793, "timestamp": "2025-09-10 02:19:47.478627", "step": 1878, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:47.509259", "step": 1878, "epoch": 1 }, { "type": "loss", "content": 0.015917208045721054, "timestamp": "2025-09-10 02:19:47.516252", "step": 1879, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:19:47.553668", "step": 1879, "epoch": 1 }, { "type": "loss", "content": 0.01571694202721119, "timestamp": "2025-09-10 02:19:47.590183", "step": 1880, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:47.621751", "step": 1880, "epoch": 1 }, { "type": "loss", "content": 0.007821121253073215, "timestamp": "2025-09-10 02:19:47.626878", "step": 1881, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:47.658608", "step": 1881, "epoch": 1 }, { "type": "loss", "content": 0.024149566888809204, "timestamp": "2025-09-10 02:19:47.666057", "step": 1882, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:47.702178", "step": 1882, "epoch": 1 }, { "type": "loss", "content": 0.00780960638076067, "timestamp": "2025-09-10 02:19:47.715829", "step": 1883, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:47.747249", "step": 1883, "epoch": 1 }, { "type": "loss", "content": 0.009634777903556824, "timestamp": "2025-09-10 02:19:47.775606", "step": 1884, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:47.806891", "step": 1884, "epoch": 1 }, { "type": "loss", "content": 0.004729505628347397, "timestamp": "2025-09-10 02:19:47.811470", "step": 1885, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:47.843086", "step": 1885, "epoch": 1 }, { "type": "loss", "content": 0.028273126110434532, "timestamp": "2025-09-10 02:19:47.855626", "step": 1886, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:47.887268", "step": 1886, "epoch": 1 }, { "type": "loss", "content": 0.0056550041772425175, "timestamp": "2025-09-10 02:19:47.894317", "step": 1887, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:47.925933", "step": 1887, "epoch": 1 }, { "type": "loss", "content": 0.0050589581951498985, "timestamp": "2025-09-10 02:19:47.954221", "step": 1888, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:47.984999", "step": 1888, "epoch": 1 }, { "type": "loss", "content": 0.01905803009867668, "timestamp": "2025-09-10 02:19:47.990440", "step": 1889, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:48.023540", "step": 1889, "epoch": 1 }, { "type": "loss", "content": 0.011093123815953732, "timestamp": "2025-09-10 02:19:48.030626", "step": 1890, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:48.062235", "step": 1890, "epoch": 1 }, { "type": "loss", "content": 0.006900664884597063, "timestamp": "2025-09-10 02:19:48.070068", "step": 1891, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:48.101187", "step": 1891, "epoch": 1 }, { "type": "loss", "content": 0.006212836597114801, "timestamp": "2025-09-10 02:19:48.128957", "step": 1892, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:48.160628", "step": 1892, "epoch": 1 }, { "type": "loss", "content": 0.004362097941339016, "timestamp": "2025-09-10 02:19:48.165615", "step": 1893, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:19:48.221178", "step": 1893, "epoch": 1 }, { "type": "loss", "content": 0.012452795170247555, "timestamp": "2025-09-10 02:19:48.244554", "step": 1894, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:19:48.276367", "step": 1894, "epoch": 1 }, { "type": "loss", "content": 0.01033748872578144, "timestamp": "2025-09-10 02:19:48.287260", "step": 1895, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:48.318079", "step": 1895, "epoch": 1 }, { "type": "loss", "content": 0.016262022778391838, "timestamp": "2025-09-10 02:19:48.345998", "step": 1896, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:19:48.382681", "step": 1896, "epoch": 1 }, { "type": "loss", "content": 0.009804087691009045, "timestamp": "2025-09-10 02:19:48.398120", "step": 1897, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:48.429228", "step": 1897, "epoch": 1 }, { "type": "loss", "content": 0.0022602048702538013, "timestamp": "2025-09-10 02:19:48.436055", "step": 1898, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:48.472753", "step": 1898, "epoch": 1 }, { "type": "loss", "content": 0.0055809845216572285, "timestamp": "2025-09-10 02:19:48.480563", "step": 1899, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:48.518880", "step": 1899, "epoch": 1 }, { "type": "loss", "content": 0.007991933263838291, "timestamp": "2025-09-10 02:19:48.551926", "step": 1900, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:19:48.588105", "step": 1900, "epoch": 1 }, { "type": "loss", "content": 0.006761971395462751, "timestamp": "2025-09-10 02:19:48.601170", "step": 1901, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:48.633355", "step": 1901, "epoch": 1 }, { "type": "loss", "content": 0.002031755167990923, "timestamp": "2025-09-10 02:19:48.645582", "step": 1902, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:48.678384", "step": 1902, "epoch": 1 }, { "type": "loss", "content": 0.003453353885561228, "timestamp": "2025-09-10 02:19:48.688607", "step": 1903, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:48.743530", "step": 1903, "epoch": 1 }, { "type": "loss", "content": 0.004564212169498205, "timestamp": "2025-09-10 02:19:48.778028", "step": 1904, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:48.810511", "step": 1904, "epoch": 1 }, { "type": "loss", "content": 0.018804430961608887, "timestamp": "2025-09-10 02:19:48.815823", "step": 1905, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:19:48.847065", "step": 1905, "epoch": 1 }, { "type": "loss", "content": 0.012097448110580444, "timestamp": "2025-09-10 02:19:48.851347", "step": 1906, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:19:48.885945", "step": 1906, "epoch": 1 }, { "type": "loss", "content": 0.003606958081945777, "timestamp": "2025-09-10 02:19:48.898477", "step": 1907, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:48.933153", "step": 1907, "epoch": 1 }, { "type": "loss", "content": 0.0061880433931946754, "timestamp": "2025-09-10 02:19:48.964240", "step": 1908, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:19:48.996907", "step": 1908, "epoch": 1 }, { "type": "loss", "content": 0.004639564547687769, "timestamp": "2025-09-10 02:19:48.999361", "step": 1909, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:49.030532", "step": 1909, "epoch": 1 }, { "type": "loss", "content": 0.0022573911119252443, "timestamp": "2025-09-10 02:19:49.037363", "step": 1910, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:49.068175", "step": 1910, "epoch": 1 }, { "type": "loss", "content": 0.006169704254716635, "timestamp": "2025-09-10 02:19:49.078299", "step": 1911, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:19:59.318357", "step": 1911, "epoch": 1 }, { "type": "pplx", "content": 15748464.88131854, "timestamp": "2025-09-10 02:19:59.321577", "step": 1911, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:19:59.355005", "step": 1911, "epoch": 1 }, { "type": "loss", "content": 0.01734175719320774, "timestamp": "2025-09-10 02:19:59.389250", "step": 1912, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:59.431938", "step": 1912, "epoch": 1 }, { "type": "loss", "content": 0.001922283903695643, "timestamp": "2025-09-10 02:19:59.436388", "step": 1913, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:59.467171", "step": 1913, "epoch": 1 }, { "type": "loss", "content": 0.028175072744488716, "timestamp": "2025-09-10 02:19:59.470920", "step": 1914, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:59.502668", "step": 1914, "epoch": 1 }, { "type": "loss", "content": 0.016676677390933037, "timestamp": "2025-09-10 02:19:59.508746", "step": 1915, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:19:59.544225", "step": 1915, "epoch": 1 }, { "type": "loss", "content": 0.025236140936613083, "timestamp": "2025-09-10 02:19:59.578750", "step": 1916, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:19:59.612425", "step": 1916, "epoch": 1 }, { "type": "loss", "content": 0.012673401273787022, "timestamp": "2025-09-10 02:19:59.614714", "step": 1917, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:19:59.648493", "step": 1917, "epoch": 1 }, { "type": "loss", "content": 0.010366697795689106, "timestamp": "2025-09-10 02:19:59.654277", "step": 1918, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:19:59.686309", "step": 1918, "epoch": 1 }, { "type": "loss", "content": 0.02431515045464039, "timestamp": "2025-09-10 02:19:59.695584", "step": 1919, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:19:59.726935", "step": 1919, "epoch": 1 }, { "type": "loss", "content": 0.014513040892779827, "timestamp": "2025-09-10 02:19:59.754412", "step": 1920, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:19:59.786752", "step": 1920, "epoch": 1 }, { "type": "loss", "content": 0.006609838455915451, "timestamp": "2025-09-10 02:19:59.791424", "step": 1921, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:19:59.827048", "step": 1921, "epoch": 1 }, { "type": "loss", "content": 0.006101998034864664, "timestamp": "2025-09-10 02:19:59.841049", "step": 1922, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:19:59.873147", "step": 1922, "epoch": 1 }, { "type": "loss", "content": 0.036406856030225754, "timestamp": "2025-09-10 02:19:59.880180", "step": 1923, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:19:59.912547", "step": 1923, "epoch": 1 }, { "type": "loss", "content": 0.034558676183223724, "timestamp": "2025-09-10 02:19:59.937079", "step": 1924, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:19:59.969338", "step": 1924, "epoch": 1 }, { "type": "loss", "content": 0.00515876105055213, "timestamp": "2025-09-10 02:19:59.978570", "step": 1925, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:00.013280", "step": 1925, "epoch": 1 }, { "type": "loss", "content": 0.014893017709255219, "timestamp": "2025-09-10 02:20:00.016983", "step": 1926, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:00.051875", "step": 1926, "epoch": 1 }, { "type": "loss", "content": 0.015396283939480782, "timestamp": "2025-09-10 02:20:00.056401", "step": 1927, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:00.095090", "step": 1927, "epoch": 1 }, { "type": "loss", "content": 0.013009021990001202, "timestamp": "2025-09-10 02:20:00.119808", "step": 1928, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:00.155475", "step": 1928, "epoch": 1 }, { "type": "loss", "content": 0.026162832975387573, "timestamp": "2025-09-10 02:20:00.164772", "step": 1929, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:00.203437", "step": 1929, "epoch": 1 }, { "type": "loss", "content": 0.01741965487599373, "timestamp": "2025-09-10 02:20:00.215425", "step": 1930, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:00.251965", "step": 1930, "epoch": 1 }, { "type": "loss", "content": 0.002141000237315893, "timestamp": "2025-09-10 02:20:00.261627", "step": 1931, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:00.303520", "step": 1931, "epoch": 1 }, { "type": "loss", "content": 0.0038426872342824936, "timestamp": "2025-09-10 02:20:00.334654", "step": 1932, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:00.374816", "step": 1932, "epoch": 1 }, { "type": "loss", "content": 0.010318142361938953, "timestamp": "2025-09-10 02:20:00.382382", "step": 1933, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:00.423403", "step": 1933, "epoch": 1 }, { "type": "loss", "content": 0.026648273691534996, "timestamp": "2025-09-10 02:20:00.433237", "step": 1934, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:00.476849", "step": 1934, "epoch": 1 }, { "type": "loss", "content": 0.04163838550448418, "timestamp": "2025-09-10 02:20:00.483471", "step": 1935, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:00.516025", "step": 1935, "epoch": 1 }, { "type": "loss", "content": 0.006355960853397846, "timestamp": "2025-09-10 02:20:00.544171", "step": 1936, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:00.577147", "step": 1936, "epoch": 1 }, { "type": "loss", "content": 0.002842534566298127, "timestamp": "2025-09-10 02:20:00.586499", "step": 1937, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:00.617329", "step": 1937, "epoch": 1 }, { "type": "loss", "content": 0.0015678989002481103, "timestamp": "2025-09-10 02:20:00.629113", "step": 1938, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:00.660175", "step": 1938, "epoch": 1 }, { "type": "loss", "content": 0.016521496698260307, "timestamp": "2025-09-10 02:20:00.666949", "step": 1939, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:00.699137", "step": 1939, "epoch": 1 }, { "type": "loss", "content": 0.0046984353102743626, "timestamp": "2025-09-10 02:20:00.727048", "step": 1940, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:00.758090", "step": 1940, "epoch": 1 }, { "type": "loss", "content": 0.004027045797556639, "timestamp": "2025-09-10 02:20:00.762685", "step": 1941, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:00.793799", "step": 1941, "epoch": 1 }, { "type": "loss", "content": 0.02891431376338005, "timestamp": "2025-09-10 02:20:00.800863", "step": 1942, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:00.834601", "step": 1942, "epoch": 1 }, { "type": "loss", "content": 0.016804974526166916, "timestamp": "2025-09-10 02:20:00.841617", "step": 1943, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:00.873058", "step": 1943, "epoch": 1 }, { "type": "loss", "content": 0.005456176120787859, "timestamp": "2025-09-10 02:20:00.904001", "step": 1944, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:00.935753", "step": 1944, "epoch": 1 }, { "type": "loss", "content": 0.002787849633023143, "timestamp": "2025-09-10 02:20:00.940304", "step": 1945, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:00.972908", "step": 1945, "epoch": 1 }, { "type": "loss", "content": 0.021894289180636406, "timestamp": "2025-09-10 02:20:00.983091", "step": 1946, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:01.014659", "step": 1946, "epoch": 1 }, { "type": "loss", "content": 0.008030838333070278, "timestamp": "2025-09-10 02:20:01.026613", "step": 1947, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:01.058472", "step": 1947, "epoch": 1 }, { "type": "loss", "content": 0.029469074681401253, "timestamp": "2025-09-10 02:20:01.086618", "step": 1948, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:01.117184", "step": 1948, "epoch": 1 }, { "type": "loss", "content": 0.004061527084559202, "timestamp": "2025-09-10 02:20:01.119788", "step": 1949, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:01.152023", "step": 1949, "epoch": 1 }, { "type": "loss", "content": 0.010181749239563942, "timestamp": "2025-09-10 02:20:01.158883", "step": 1950, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:01.189424", "step": 1950, "epoch": 1 }, { "type": "loss", "content": 0.021814599633216858, "timestamp": "2025-09-10 02:20:01.193951", "step": 1951, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:01.228797", "step": 1951, "epoch": 1 }, { "type": "loss", "content": 0.009718448854982853, "timestamp": "2025-09-10 02:20:01.257497", "step": 1952, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:01.289417", "step": 1952, "epoch": 1 }, { "type": "loss", "content": 0.005462608300149441, "timestamp": "2025-09-10 02:20:01.296372", "step": 1953, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:01.327704", "step": 1953, "epoch": 1 }, { "type": "loss", "content": 0.002389210741966963, "timestamp": "2025-09-10 02:20:01.334816", "step": 1954, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:01.365920", "step": 1954, "epoch": 1 }, { "type": "loss", "content": 0.0015777194639667869, "timestamp": "2025-09-10 02:20:01.375825", "step": 1955, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:01.406777", "step": 1955, "epoch": 1 }, { "type": "loss", "content": 0.007745720446109772, "timestamp": "2025-09-10 02:20:01.435074", "step": 1956, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:01.468323", "step": 1956, "epoch": 1 }, { "type": "loss", "content": 0.009010471403598785, "timestamp": "2025-09-10 02:20:01.481413", "step": 1957, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:01.514875", "step": 1957, "epoch": 1 }, { "type": "loss", "content": 0.006604184862226248, "timestamp": "2025-09-10 02:20:01.528177", "step": 1958, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:01.561394", "step": 1958, "epoch": 1 }, { "type": "loss", "content": 0.0016930067213252187, "timestamp": "2025-09-10 02:20:01.569093", "step": 1959, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:01.601187", "step": 1959, "epoch": 1 }, { "type": "loss", "content": 0.016999879851937294, "timestamp": "2025-09-10 02:20:01.633933", "step": 1960, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:01.664733", "step": 1960, "epoch": 1 }, { "type": "loss", "content": 0.0018128232331946492, "timestamp": "2025-09-10 02:20:01.669447", "step": 1961, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:01.700432", "step": 1961, "epoch": 1 }, { "type": "loss", "content": 0.0029406710527837276, "timestamp": "2025-09-10 02:20:01.708110", "step": 1962, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:01.740014", "step": 1962, "epoch": 1 }, { "type": "loss", "content": 0.011884416453540325, "timestamp": "2025-09-10 02:20:01.746627", "step": 1963, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:01.777969", "step": 1963, "epoch": 1 }, { "type": "loss", "content": 0.011890656314790249, "timestamp": "2025-09-10 02:20:01.808641", "step": 1964, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:01.842975", "step": 1964, "epoch": 1 }, { "type": "loss", "content": 0.012111729942262173, "timestamp": "2025-09-10 02:20:01.847753", "step": 1965, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:01.878753", "step": 1965, "epoch": 1 }, { "type": "loss", "content": 0.00835806131362915, "timestamp": "2025-09-10 02:20:01.881443", "step": 1966, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:01.912407", "step": 1966, "epoch": 1 }, { "type": "loss", "content": 0.01073089987039566, "timestamp": "2025-09-10 02:20:01.924880", "step": 1967, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:01.956058", "step": 1967, "epoch": 1 }, { "type": "loss", "content": 0.0013615777716040611, "timestamp": "2025-09-10 02:20:01.980552", "step": 1968, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:02.011121", "step": 1968, "epoch": 1 }, { "type": "loss", "content": 0.026787450537085533, "timestamp": "2025-09-10 02:20:02.015414", "step": 1969, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:02.046192", "step": 1969, "epoch": 1 }, { "type": "loss", "content": 0.024200987070798874, "timestamp": "2025-09-10 02:20:02.052835", "step": 1970, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:02.095175", "step": 1970, "epoch": 1 }, { "type": "loss", "content": 0.022902294993400574, "timestamp": "2025-09-10 02:20:02.097512", "step": 1971, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:02.129584", "step": 1971, "epoch": 1 }, { "type": "loss", "content": 0.01670275256037712, "timestamp": "2025-09-10 02:20:02.162362", "step": 1972, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:02.194796", "step": 1972, "epoch": 1 }, { "type": "loss", "content": 0.004363041836768389, "timestamp": "2025-09-10 02:20:02.196975", "step": 1973, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:20:02.233718", "step": 1973, "epoch": 1 }, { "type": "loss", "content": 0.007900647819042206, "timestamp": "2025-09-10 02:20:02.247731", "step": 1974, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:02.279427", "step": 1974, "epoch": 1 }, { "type": "loss", "content": 0.023129496723413467, "timestamp": "2025-09-10 02:20:02.283217", "step": 1975, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:02.314827", "step": 1975, "epoch": 1 }, { "type": "loss", "content": 0.0061828577890992165, "timestamp": "2025-09-10 02:20:02.343125", "step": 1976, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:20:02.380946", "step": 1976, "epoch": 1 }, { "type": "loss", "content": 0.011836091056466103, "timestamp": "2025-09-10 02:20:02.396599", "step": 1977, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:02.427786", "step": 1977, "epoch": 1 }, { "type": "loss", "content": 0.054816387593746185, "timestamp": "2025-09-10 02:20:02.435329", "step": 1978, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:02.467061", "step": 1978, "epoch": 1 }, { "type": "loss", "content": 0.0033305014949291945, "timestamp": "2025-09-10 02:20:02.477014", "step": 1979, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:02.511551", "step": 1979, "epoch": 1 }, { "type": "loss", "content": 0.001081528840586543, "timestamp": "2025-09-10 02:20:02.539830", "step": 1980, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:02.580188", "step": 1980, "epoch": 1 }, { "type": "loss", "content": 0.03720958158373833, "timestamp": "2025-09-10 02:20:02.585005", "step": 1981, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:02.627441", "step": 1981, "epoch": 1 }, { "type": "loss", "content": 0.017248960211873055, "timestamp": "2025-09-10 02:20:02.631086", "step": 1982, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:20:02.673598", "step": 1982, "epoch": 1 }, { "type": "loss", "content": 0.006337800528854132, "timestamp": "2025-09-10 02:20:02.689914", "step": 1983, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:02.724966", "step": 1983, "epoch": 1 }, { "type": "loss", "content": 0.038486216217279434, "timestamp": "2025-09-10 02:20:02.751861", "step": 1984, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:02.783687", "step": 1984, "epoch": 1 }, { "type": "loss", "content": 0.01467389427125454, "timestamp": "2025-09-10 02:20:02.788693", "step": 1985, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:02.819594", "step": 1985, "epoch": 1 }, { "type": "loss", "content": 0.01384007465094328, "timestamp": "2025-09-10 02:20:02.823988", "step": 1986, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:02.855334", "step": 1986, "epoch": 1 }, { "type": "loss", "content": 0.029948865994811058, "timestamp": "2025-09-10 02:20:02.862195", "step": 1987, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:02.893690", "step": 1987, "epoch": 1 }, { "type": "loss", "content": 0.007479586638510227, "timestamp": "2025-09-10 02:20:02.921136", "step": 1988, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:02.951996", "step": 1988, "epoch": 1 }, { "type": "loss", "content": 0.00857719499617815, "timestamp": "2025-09-10 02:20:02.956381", "step": 1989, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:02.988249", "step": 1989, "epoch": 1 }, { "type": "loss", "content": 0.012705625034868717, "timestamp": "2025-09-10 02:20:02.998080", "step": 1990, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:03.030003", "step": 1990, "epoch": 1 }, { "type": "loss", "content": 0.015617369674146175, "timestamp": "2025-09-10 02:20:03.036771", "step": 1991, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:03.068372", "step": 1991, "epoch": 1 }, { "type": "loss", "content": 0.014881722629070282, "timestamp": "2025-09-10 02:20:03.101574", "step": 1992, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:03.132743", "step": 1992, "epoch": 1 }, { "type": "loss", "content": 0.012684579007327557, "timestamp": "2025-09-10 02:20:03.135471", "step": 1993, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:03.167036", "step": 1993, "epoch": 1 }, { "type": "loss", "content": 0.00392954470589757, "timestamp": "2025-09-10 02:20:03.173954", "step": 1994, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:03.206365", "step": 1994, "epoch": 1 }, { "type": "loss", "content": 0.003335257526487112, "timestamp": "2025-09-10 02:20:03.213851", "step": 1995, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:03.244516", "step": 1995, "epoch": 1 }, { "type": "loss", "content": 0.014795198105275631, "timestamp": "2025-09-10 02:20:03.269528", "step": 1996, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:03.300340", "step": 1996, "epoch": 1 }, { "type": "loss", "content": 0.003911779262125492, "timestamp": "2025-09-10 02:20:03.305613", "step": 1997, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:03.335757", "step": 1997, "epoch": 1 }, { "type": "loss", "content": 0.0010702766012400389, "timestamp": "2025-09-10 02:20:03.346036", "step": 1998, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:03.378598", "step": 1998, "epoch": 1 }, { "type": "loss", "content": 0.006708834785968065, "timestamp": "2025-09-10 02:20:03.383112", "step": 1999, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:03.413851", "step": 1999, "epoch": 1 }, { "type": "loss", "content": 0.018123431131243706, "timestamp": "2025-09-10 02:20:03.446982", "step": 2000, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 2000", "timestamp": "2025-09-10 02:20:08.143067", "step": 2000, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:08.176414", "step": 2000, "epoch": 1 }, { "type": "loss", "content": 0.013308617286384106, "timestamp": "2025-09-10 02:20:08.184018", "step": 2001, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:08.216585", "step": 2001, "epoch": 1 }, { "type": "loss", "content": 0.009740256704390049, "timestamp": "2025-09-10 02:20:08.220053", "step": 2002, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:08.255010", "step": 2002, "epoch": 1 }, { "type": "loss", "content": 0.002783420728519559, "timestamp": "2025-09-10 02:20:08.259076", "step": 2003, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:08.290467", "step": 2003, "epoch": 1 }, { "type": "loss", "content": 0.016279999166727066, "timestamp": "2025-09-10 02:20:08.317646", "step": 2004, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:08.350363", "step": 2004, "epoch": 1 }, { "type": "loss", "content": 0.002739792922511697, "timestamp": "2025-09-10 02:20:08.360066", "step": 2005, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:08.391551", "step": 2005, "epoch": 1 }, { "type": "loss", "content": 0.0065587302669882774, "timestamp": "2025-09-10 02:20:08.398108", "step": 2006, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:08.429557", "step": 2006, "epoch": 1 }, { "type": "loss", "content": 0.03201238065958023, "timestamp": "2025-09-10 02:20:08.433423", "step": 2007, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:08.465425", "step": 2007, "epoch": 1 }, { "type": "loss", "content": 0.0022500918712466955, "timestamp": "2025-09-10 02:20:08.496454", "step": 2008, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:08.528796", "step": 2008, "epoch": 1 }, { "type": "loss", "content": 0.04503735154867172, "timestamp": "2025-09-10 02:20:08.535233", "step": 2009, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:08.566556", "step": 2009, "epoch": 1 }, { "type": "loss", "content": 0.0013476565945893526, "timestamp": "2025-09-10 02:20:08.574229", "step": 2010, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:08.606641", "step": 2010, "epoch": 1 }, { "type": "loss", "content": 0.0042086499743163586, "timestamp": "2025-09-10 02:20:08.608893", "step": 2011, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:08.640311", "step": 2011, "epoch": 1 }, { "type": "loss", "content": 0.006380919367074966, "timestamp": "2025-09-10 02:20:08.667889", "step": 2012, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:08.699255", "step": 2012, "epoch": 1 }, { "type": "loss", "content": 0.02008945681154728, "timestamp": "2025-09-10 02:20:08.703452", "step": 2013, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:08.734085", "step": 2013, "epoch": 1 }, { "type": "loss", "content": 0.0637151375412941, "timestamp": "2025-09-10 02:20:08.736305", "step": 2014, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:08.767522", "step": 2014, "epoch": 1 }, { "type": "loss", "content": 0.0012976779835298657, "timestamp": "2025-09-10 02:20:08.771683", "step": 2015, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:08.802724", "step": 2015, "epoch": 1 }, { "type": "loss", "content": 0.013720971532166004, "timestamp": "2025-09-10 02:20:08.831025", "step": 2016, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:08.862613", "step": 2016, "epoch": 1 }, { "type": "loss", "content": 0.017362453043460846, "timestamp": "2025-09-10 02:20:08.865166", "step": 2017, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:08.896836", "step": 2017, "epoch": 1 }, { "type": "loss", "content": 0.005491399206221104, "timestamp": "2025-09-10 02:20:08.904248", "step": 2018, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:08.935110", "step": 2018, "epoch": 1 }, { "type": "loss", "content": 0.01876024715602398, "timestamp": "2025-09-10 02:20:08.939187", "step": 2019, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:08.972204", "step": 2019, "epoch": 1 }, { "type": "loss", "content": 0.009473063051700592, "timestamp": "2025-09-10 02:20:08.996335", "step": 2020, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:09.029022", "step": 2020, "epoch": 1 }, { "type": "loss", "content": 0.01576576568186283, "timestamp": "2025-09-10 02:20:09.036361", "step": 2021, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:09.071077", "step": 2021, "epoch": 1 }, { "type": "loss", "content": 0.030170930549502373, "timestamp": "2025-09-10 02:20:09.084825", "step": 2022, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:09.120147", "step": 2022, "epoch": 1 }, { "type": "loss", "content": 0.02932755835354328, "timestamp": "2025-09-10 02:20:09.133836", "step": 2023, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:09.165574", "step": 2023, "epoch": 1 }, { "type": "loss", "content": 0.0012282658135518432, "timestamp": "2025-09-10 02:20:09.193653", "step": 2024, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:09.226223", "step": 2024, "epoch": 1 }, { "type": "loss", "content": 0.006276692263782024, "timestamp": "2025-09-10 02:20:09.239234", "step": 2025, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:09.270967", "step": 2025, "epoch": 1 }, { "type": "loss", "content": 0.011697587557137012, "timestamp": "2025-09-10 02:20:09.278708", "step": 2026, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:09.311121", "step": 2026, "epoch": 1 }, { "type": "loss", "content": 0.007251562085002661, "timestamp": "2025-09-10 02:20:09.318243", "step": 2027, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:09.349398", "step": 2027, "epoch": 1 }, { "type": "loss", "content": 0.00808743666857481, "timestamp": "2025-09-10 02:20:09.377091", "step": 2028, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:09.408086", "step": 2028, "epoch": 1 }, { "type": "loss", "content": 0.0025386668276041746, "timestamp": "2025-09-10 02:20:09.410319", "step": 2029, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:09.441351", "step": 2029, "epoch": 1 }, { "type": "loss", "content": 0.059052709490060806, "timestamp": "2025-09-10 02:20:09.443589", "step": 2030, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:09.476520", "step": 2030, "epoch": 1 }, { "type": "loss", "content": 0.01944047398865223, "timestamp": "2025-09-10 02:20:09.484150", "step": 2031, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:09.518094", "step": 2031, "epoch": 1 }, { "type": "loss", "content": 0.01776472106575966, "timestamp": "2025-09-10 02:20:09.552272", "step": 2032, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:09.586355", "step": 2032, "epoch": 1 }, { "type": "loss", "content": 0.0041867028921842575, "timestamp": "2025-09-10 02:20:09.591297", "step": 2033, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:09.624274", "step": 2033, "epoch": 1 }, { "type": "loss", "content": 0.016866056248545647, "timestamp": "2025-09-10 02:20:09.636312", "step": 2034, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:09.667931", "step": 2034, "epoch": 1 }, { "type": "loss", "content": 0.009813317097723484, "timestamp": "2025-09-10 02:20:09.674821", "step": 2035, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:20:09.720805", "step": 2035, "epoch": 1 }, { "type": "loss", "content": 0.009570365771651268, "timestamp": "2025-09-10 02:20:09.760848", "step": 2036, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:09.793278", "step": 2036, "epoch": 1 }, { "type": "loss", "content": 0.011103704571723938, "timestamp": "2025-09-10 02:20:09.797984", "step": 2037, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:09.832433", "step": 2037, "epoch": 1 }, { "type": "loss", "content": 0.016965234652161598, "timestamp": "2025-09-10 02:20:09.845793", "step": 2038, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:09.877310", "step": 2038, "epoch": 1 }, { "type": "loss", "content": 0.012328843586146832, "timestamp": "2025-09-10 02:20:09.883972", "step": 2039, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:09.915267", "step": 2039, "epoch": 1 }, { "type": "loss", "content": 0.01965012215077877, "timestamp": "2025-09-10 02:20:09.943494", "step": 2040, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:09.975189", "step": 2040, "epoch": 1 }, { "type": "loss", "content": 0.015359156765043736, "timestamp": "2025-09-10 02:20:09.979739", "step": 2041, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:10.015411", "step": 2041, "epoch": 1 }, { "type": "loss", "content": 0.025193924084305763, "timestamp": "2025-09-10 02:20:10.029190", "step": 2042, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:10.063910", "step": 2042, "epoch": 1 }, { "type": "loss", "content": 0.006153655704110861, "timestamp": "2025-09-10 02:20:10.070617", "step": 2043, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:20:10.109571", "step": 2043, "epoch": 1 }, { "type": "loss", "content": 0.003595164744183421, "timestamp": "2025-09-10 02:20:10.146348", "step": 2044, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:10.179076", "step": 2044, "epoch": 1 }, { "type": "loss", "content": 0.002315750578418374, "timestamp": "2025-09-10 02:20:10.191421", "step": 2045, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:10.222927", "step": 2045, "epoch": 1 }, { "type": "loss", "content": 0.01863052323460579, "timestamp": "2025-09-10 02:20:10.227211", "step": 2046, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:10.266149", "step": 2046, "epoch": 1 }, { "type": "loss", "content": 0.02388039045035839, "timestamp": "2025-09-10 02:20:10.270375", "step": 2047, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:10.302182", "step": 2047, "epoch": 1 }, { "type": "loss", "content": 0.014288315549492836, "timestamp": "2025-09-10 02:20:10.332651", "step": 2048, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:10.364502", "step": 2048, "epoch": 1 }, { "type": "loss", "content": 0.02336038462817669, "timestamp": "2025-09-10 02:20:10.366451", "step": 2049, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:10.399108", "step": 2049, "epoch": 1 }, { "type": "loss", "content": 0.03333750367164612, "timestamp": "2025-09-10 02:20:10.406210", "step": 2050, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:10.439798", "step": 2050, "epoch": 1 }, { "type": "loss", "content": 0.03487107530236244, "timestamp": "2025-09-10 02:20:10.453116", "step": 2051, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:10.488181", "step": 2051, "epoch": 1 }, { "type": "loss", "content": 0.008551175706088543, "timestamp": "2025-09-10 02:20:10.516117", "step": 2052, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:10.560118", "step": 2052, "epoch": 1 }, { "type": "loss", "content": 0.000608120986726135, "timestamp": "2025-09-10 02:20:10.565386", "step": 2053, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:10.603975", "step": 2053, "epoch": 1 }, { "type": "loss", "content": 0.0034163747914135456, "timestamp": "2025-09-10 02:20:10.614377", "step": 2054, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:10.651410", "step": 2054, "epoch": 1 }, { "type": "loss", "content": 0.008907620795071125, "timestamp": "2025-09-10 02:20:10.656895", "step": 2055, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:10.692828", "step": 2055, "epoch": 1 }, { "type": "loss", "content": 0.022446779534220695, "timestamp": "2025-09-10 02:20:10.720333", "step": 2056, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:10.756370", "step": 2056, "epoch": 1 }, { "type": "loss", "content": 0.021804099902510643, "timestamp": "2025-09-10 02:20:10.765348", "step": 2057, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:10.796437", "step": 2057, "epoch": 1 }, { "type": "loss", "content": 0.004856002051383257, "timestamp": "2025-09-10 02:20:10.806145", "step": 2058, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:20:21.241490", "step": 2058, "epoch": 1 }, { "type": "pplx", "content": 17864993.387790058, "timestamp": "2025-09-10 02:20:21.246430", "step": 2058, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:21.279535", "step": 2058, "epoch": 1 }, { "type": "loss", "content": 0.02261553891003132, "timestamp": "2025-09-10 02:20:21.287744", "step": 2059, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:21.323575", "step": 2059, "epoch": 1 }, { "type": "loss", "content": 0.007157010026276112, "timestamp": "2025-09-10 02:20:21.357682", "step": 2060, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:21.389763", "step": 2060, "epoch": 1 }, { "type": "loss", "content": 0.026648053899407387, "timestamp": "2025-09-10 02:20:21.398755", "step": 2061, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:21.430854", "step": 2061, "epoch": 1 }, { "type": "loss", "content": 0.006498201750218868, "timestamp": "2025-09-10 02:20:21.437552", "step": 2062, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:21.469463", "step": 2062, "epoch": 1 }, { "type": "loss", "content": 0.024175258353352547, "timestamp": "2025-09-10 02:20:21.476964", "step": 2063, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:21.509159", "step": 2063, "epoch": 1 }, { "type": "loss", "content": 0.007673321757465601, "timestamp": "2025-09-10 02:20:21.537240", "step": 2064, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:21.569051", "step": 2064, "epoch": 1 }, { "type": "loss", "content": 0.005579050164669752, "timestamp": "2025-09-10 02:20:21.571289", "step": 2065, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:21.603339", "step": 2065, "epoch": 1 }, { "type": "loss", "content": 0.010995094664394855, "timestamp": "2025-09-10 02:20:21.610701", "step": 2066, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:21.643363", "step": 2066, "epoch": 1 }, { "type": "loss", "content": 0.03568677604198456, "timestamp": "2025-09-10 02:20:21.655675", "step": 2067, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:21.687456", "step": 2067, "epoch": 1 }, { "type": "loss", "content": 0.00576269906014204, "timestamp": "2025-09-10 02:20:21.720291", "step": 2068, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:21.753413", "step": 2068, "epoch": 1 }, { "type": "loss", "content": 0.014195759780704975, "timestamp": "2025-09-10 02:20:21.758422", "step": 2069, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:21.792123", "step": 2069, "epoch": 1 }, { "type": "loss", "content": 0.0028361105360090733, "timestamp": "2025-09-10 02:20:21.802984", "step": 2070, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:21.837230", "step": 2070, "epoch": 1 }, { "type": "loss", "content": 0.003535019000992179, "timestamp": "2025-09-10 02:20:21.844518", "step": 2071, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:21.875162", "step": 2071, "epoch": 1 }, { "type": "loss", "content": 0.006695587653666735, "timestamp": "2025-09-10 02:20:21.903471", "step": 2072, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:21.933981", "step": 2072, "epoch": 1 }, { "type": "loss", "content": 0.017478538677096367, "timestamp": "2025-09-10 02:20:21.938764", "step": 2073, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:21.969929", "step": 2073, "epoch": 1 }, { "type": "loss", "content": 0.005236539524048567, "timestamp": "2025-09-10 02:20:21.982093", "step": 2074, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:22.016908", "step": 2074, "epoch": 1 }, { "type": "loss", "content": 0.020143600180745125, "timestamp": "2025-09-10 02:20:22.030586", "step": 2075, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:22.061615", "step": 2075, "epoch": 1 }, { "type": "loss", "content": 0.017758900299668312, "timestamp": "2025-09-10 02:20:22.085241", "step": 2076, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:22.115780", "step": 2076, "epoch": 1 }, { "type": "loss", "content": 0.014581401832401752, "timestamp": "2025-09-10 02:20:22.118185", "step": 2077, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:22.150212", "step": 2077, "epoch": 1 }, { "type": "loss", "content": 0.0038522446993738413, "timestamp": "2025-09-10 02:20:22.157895", "step": 2078, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:22.191209", "step": 2078, "epoch": 1 }, { "type": "loss", "content": 0.006819657050073147, "timestamp": "2025-09-10 02:20:22.198954", "step": 2079, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:20:22.241658", "step": 2079, "epoch": 1 }, { "type": "loss", "content": 0.0016683044377714396, "timestamp": "2025-09-10 02:20:22.280290", "step": 2080, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:22.311231", "step": 2080, "epoch": 1 }, { "type": "loss", "content": 0.006357488688081503, "timestamp": "2025-09-10 02:20:22.316660", "step": 2081, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:22.347407", "step": 2081, "epoch": 1 }, { "type": "loss", "content": 0.026342766359448433, "timestamp": "2025-09-10 02:20:22.354544", "step": 2082, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:22.385455", "step": 2082, "epoch": 1 }, { "type": "loss", "content": 0.021110793575644493, "timestamp": "2025-09-10 02:20:22.397683", "step": 2083, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:22.431408", "step": 2083, "epoch": 1 }, { "type": "loss", "content": 0.007887489162385464, "timestamp": "2025-09-10 02:20:22.456451", "step": 2084, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:22.493285", "step": 2084, "epoch": 1 }, { "type": "loss", "content": 0.01206361036747694, "timestamp": "2025-09-10 02:20:22.508412", "step": 2085, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:22.539607", "step": 2085, "epoch": 1 }, { "type": "loss", "content": 0.0012552806874737144, "timestamp": "2025-09-10 02:20:22.546422", "step": 2086, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:22.578638", "step": 2086, "epoch": 1 }, { "type": "loss", "content": 0.008134759962558746, "timestamp": "2025-09-10 02:20:22.585542", "step": 2087, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:20:22.620360", "step": 2087, "epoch": 1 }, { "type": "loss", "content": 0.0027143345214426517, "timestamp": "2025-09-10 02:20:22.655226", "step": 2088, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:22.688088", "step": 2088, "epoch": 1 }, { "type": "loss", "content": 0.0388091541826725, "timestamp": "2025-09-10 02:20:22.690414", "step": 2089, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:22.723556", "step": 2089, "epoch": 1 }, { "type": "loss", "content": 0.0491640567779541, "timestamp": "2025-09-10 02:20:22.734527", "step": 2090, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:22.772601", "step": 2090, "epoch": 1 }, { "type": "loss", "content": 0.006399436388164759, "timestamp": "2025-09-10 02:20:22.788192", "step": 2091, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:22.821167", "step": 2091, "epoch": 1 }, { "type": "loss", "content": 0.011510169133543968, "timestamp": "2025-09-10 02:20:22.845097", "step": 2092, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:22.881376", "step": 2092, "epoch": 1 }, { "type": "loss", "content": 0.07845025509595871, "timestamp": "2025-09-10 02:20:22.896561", "step": 2093, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:22.929539", "step": 2093, "epoch": 1 }, { "type": "loss", "content": 0.026220111176371574, "timestamp": "2025-09-10 02:20:22.936638", "step": 2094, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:22.968347", "step": 2094, "epoch": 1 }, { "type": "loss", "content": 0.02482834830880165, "timestamp": "2025-09-10 02:20:22.978699", "step": 2095, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:23.010557", "step": 2095, "epoch": 1 }, { "type": "loss", "content": 0.017762552946805954, "timestamp": "2025-09-10 02:20:23.038344", "step": 2096, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:23.071589", "step": 2096, "epoch": 1 }, { "type": "loss", "content": 0.0069184741005301476, "timestamp": "2025-09-10 02:20:23.078394", "step": 2097, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:23.110349", "step": 2097, "epoch": 1 }, { "type": "loss", "content": 0.008609072305262089, "timestamp": "2025-09-10 02:20:23.116652", "step": 2098, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:23.147965", "step": 2098, "epoch": 1 }, { "type": "loss", "content": 0.001422119210474193, "timestamp": "2025-09-10 02:20:23.159655", "step": 2099, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:23.193309", "step": 2099, "epoch": 1 }, { "type": "loss", "content": 0.006961984094232321, "timestamp": "2025-09-10 02:20:23.224650", "step": 2100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:23.255497", "step": 2100, "epoch": 1 }, { "type": "loss", "content": 0.0065904962830245495, "timestamp": "2025-09-10 02:20:23.257675", "step": 2101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:23.288200", "step": 2101, "epoch": 1 }, { "type": "loss", "content": 0.004559192340821028, "timestamp": "2025-09-10 02:20:23.292701", "step": 2102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:23.324242", "step": 2102, "epoch": 1 }, { "type": "loss", "content": 0.01074813213199377, "timestamp": "2025-09-10 02:20:23.331808", "step": 2103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:23.363551", "step": 2103, "epoch": 1 }, { "type": "loss", "content": 0.017620306462049484, "timestamp": "2025-09-10 02:20:23.395347", "step": 2104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:23.427142", "step": 2104, "epoch": 1 }, { "type": "loss", "content": 0.008062370121479034, "timestamp": "2025-09-10 02:20:23.439784", "step": 2105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:23.471788", "step": 2105, "epoch": 1 }, { "type": "loss", "content": 0.029557526111602783, "timestamp": "2025-09-10 02:20:23.476036", "step": 2106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:23.510903", "step": 2106, "epoch": 1 }, { "type": "loss", "content": 0.014713338576257229, "timestamp": "2025-09-10 02:20:23.524282", "step": 2107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:23.555121", "step": 2107, "epoch": 1 }, { "type": "loss", "content": 0.011615300551056862, "timestamp": "2025-09-10 02:20:23.580582", "step": 2108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:23.611130", "step": 2108, "epoch": 1 }, { "type": "loss", "content": 0.0011783967493101954, "timestamp": "2025-09-10 02:20:23.619196", "step": 2109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:23.651276", "step": 2109, "epoch": 1 }, { "type": "loss", "content": 0.006283028516918421, "timestamp": "2025-09-10 02:20:23.662054", "step": 2110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:23.695199", "step": 2110, "epoch": 1 }, { "type": "loss", "content": 0.015238684602081776, "timestamp": "2025-09-10 02:20:23.708698", "step": 2111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:23.740097", "step": 2111, "epoch": 1 }, { "type": "loss", "content": 0.0074634556658566, "timestamp": "2025-09-10 02:20:23.770592", "step": 2112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:23.805506", "step": 2112, "epoch": 1 }, { "type": "loss", "content": 0.011353401467204094, "timestamp": "2025-09-10 02:20:23.809323", "step": 2113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:23.843406", "step": 2113, "epoch": 1 }, { "type": "loss", "content": 0.021240845322608948, "timestamp": "2025-09-10 02:20:23.848788", "step": 2114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:23.883014", "step": 2114, "epoch": 1 }, { "type": "loss", "content": 0.018865080550312996, "timestamp": "2025-09-10 02:20:23.888498", "step": 2115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:23.922213", "step": 2115, "epoch": 1 }, { "type": "loss", "content": 0.019352329894900322, "timestamp": "2025-09-10 02:20:23.949465", "step": 2116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:23.983621", "step": 2116, "epoch": 1 }, { "type": "loss", "content": 0.01287752389907837, "timestamp": "2025-09-10 02:20:23.986675", "step": 2117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:24.020402", "step": 2117, "epoch": 1 }, { "type": "loss", "content": 0.03353200852870941, "timestamp": "2025-09-10 02:20:24.023711", "step": 2118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:24.057404", "step": 2118, "epoch": 1 }, { "type": "loss", "content": 0.0233648419380188, "timestamp": "2025-09-10 02:20:24.059721", "step": 2119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:24.092936", "step": 2119, "epoch": 1 }, { "type": "loss", "content": 0.03482295200228691, "timestamp": "2025-09-10 02:20:24.125226", "step": 2120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:24.156601", "step": 2120, "epoch": 1 }, { "type": "loss", "content": 0.02343512885272503, "timestamp": "2025-09-10 02:20:24.158914", "step": 2121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:24.190199", "step": 2121, "epoch": 1 }, { "type": "loss", "content": 0.022608637809753418, "timestamp": "2025-09-10 02:20:24.197547", "step": 2122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:24.228337", "step": 2122, "epoch": 1 }, { "type": "loss", "content": 0.0130180474370718, "timestamp": "2025-09-10 02:20:24.238347", "step": 2123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:24.269288", "step": 2123, "epoch": 1 }, { "type": "loss", "content": 0.014483482576906681, "timestamp": "2025-09-10 02:20:24.296619", "step": 2124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:24.328265", "step": 2124, "epoch": 1 }, { "type": "loss", "content": 0.05478040128946304, "timestamp": "2025-09-10 02:20:24.336772", "step": 2125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:24.367265", "step": 2125, "epoch": 1 }, { "type": "loss", "content": 0.00959568191319704, "timestamp": "2025-09-10 02:20:24.374201", "step": 2126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:24.406908", "step": 2126, "epoch": 1 }, { "type": "loss", "content": 0.012538508512079716, "timestamp": "2025-09-10 02:20:24.411278", "step": 2127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:24.442470", "step": 2127, "epoch": 1 }, { "type": "loss", "content": 0.0026257862336933613, "timestamp": "2025-09-10 02:20:24.473702", "step": 2128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:24.503894", "step": 2128, "epoch": 1 }, { "type": "loss", "content": 0.018345599994063377, "timestamp": "2025-09-10 02:20:24.506139", "step": 2129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:24.537211", "step": 2129, "epoch": 1 }, { "type": "loss", "content": 0.014367531053721905, "timestamp": "2025-09-10 02:20:24.544118", "step": 2130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:24.583299", "step": 2130, "epoch": 1 }, { "type": "loss", "content": 0.0052671851590275764, "timestamp": "2025-09-10 02:20:24.587542", "step": 2131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:24.628092", "step": 2131, "epoch": 1 }, { "type": "loss", "content": 0.017293930053710938, "timestamp": "2025-09-10 02:20:24.659286", "step": 2132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:24.691742", "step": 2132, "epoch": 1 }, { "type": "loss", "content": 0.0051933168433606625, "timestamp": "2025-09-10 02:20:24.702182", "step": 2133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:24.733405", "step": 2133, "epoch": 1 }, { "type": "loss", "content": 0.008713253773748875, "timestamp": "2025-09-10 02:20:24.740295", "step": 2134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:24.771556", "step": 2134, "epoch": 1 }, { "type": "loss", "content": 0.010111020877957344, "timestamp": "2025-09-10 02:20:24.783335", "step": 2135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:24.821841", "step": 2135, "epoch": 1 }, { "type": "loss", "content": 0.014230245724320412, "timestamp": "2025-09-10 02:20:24.858442", "step": 2136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:24.892148", "step": 2136, "epoch": 1 }, { "type": "loss", "content": 0.013335539028048515, "timestamp": "2025-09-10 02:20:24.905294", "step": 2137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:24.936397", "step": 2137, "epoch": 1 }, { "type": "loss", "content": 0.018712077289819717, "timestamp": "2025-09-10 02:20:24.943181", "step": 2138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:24.976717", "step": 2138, "epoch": 1 }, { "type": "loss", "content": 0.028135867789387703, "timestamp": "2025-09-10 02:20:24.990073", "step": 2139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.021402", "step": 2139, "epoch": 1 }, { "type": "loss", "content": 0.014833973720669746, "timestamp": "2025-09-10 02:20:25.049743", "step": 2140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:25.079948", "step": 2140, "epoch": 1 }, { "type": "loss", "content": 0.005817878060042858, "timestamp": "2025-09-10 02:20:25.082067", "step": 2141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:25.112835", "step": 2141, "epoch": 1 }, { "type": "loss", "content": 0.00543214799836278, "timestamp": "2025-09-10 02:20:25.123106", "step": 2142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:25.153958", "step": 2142, "epoch": 1 }, { "type": "loss", "content": 0.0051437122747302055, "timestamp": "2025-09-10 02:20:25.164268", "step": 2143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:25.198938", "step": 2143, "epoch": 1 }, { "type": "loss", "content": 0.004253858234733343, "timestamp": "2025-09-10 02:20:25.233271", "step": 2144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.264621", "step": 2144, "epoch": 1 }, { "type": "loss", "content": 0.015032351948320866, "timestamp": "2025-09-10 02:20:25.269699", "step": 2145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:25.300931", "step": 2145, "epoch": 1 }, { "type": "loss", "content": 0.010134616866707802, "timestamp": "2025-09-10 02:20:25.313334", "step": 2146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.344866", "step": 2146, "epoch": 1 }, { "type": "loss", "content": 0.003961100243031979, "timestamp": "2025-09-10 02:20:25.352268", "step": 2147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.383479", "step": 2147, "epoch": 1 }, { "type": "loss", "content": 0.014108018949627876, "timestamp": "2025-09-10 02:20:25.411763", "step": 2148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.442846", "step": 2148, "epoch": 1 }, { "type": "loss", "content": 0.0111985569819808, "timestamp": "2025-09-10 02:20:25.447948", "step": 2149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:25.477822", "step": 2149, "epoch": 1 }, { "type": "loss", "content": 0.02083497866988182, "timestamp": "2025-09-10 02:20:25.480008", "step": 2150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:20:25.514467", "step": 2150, "epoch": 1 }, { "type": "loss", "content": 0.005785502027720213, "timestamp": "2025-09-10 02:20:25.528509", "step": 2151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:25.559732", "step": 2151, "epoch": 1 }, { "type": "loss", "content": 0.008959316648542881, "timestamp": "2025-09-10 02:20:25.587452", "step": 2152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:25.618341", "step": 2152, "epoch": 1 }, { "type": "loss", "content": 0.008241984061896801, "timestamp": "2025-09-10 02:20:25.628840", "step": 2153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:25.659730", "step": 2153, "epoch": 1 }, { "type": "loss", "content": 0.006875708233565092, "timestamp": "2025-09-10 02:20:25.667335", "step": 2154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:25.701679", "step": 2154, "epoch": 1 }, { "type": "loss", "content": 0.0037335853558033705, "timestamp": "2025-09-10 02:20:25.709348", "step": 2155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:25.740435", "step": 2155, "epoch": 1 }, { "type": "loss", "content": 0.020355254411697388, "timestamp": "2025-09-10 02:20:25.768161", "step": 2156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:25.801811", "step": 2156, "epoch": 1 }, { "type": "loss", "content": 0.009400454349815845, "timestamp": "2025-09-10 02:20:25.814913", "step": 2157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:25.846853", "step": 2157, "epoch": 1 }, { "type": "loss", "content": 0.006258614361286163, "timestamp": "2025-09-10 02:20:25.853851", "step": 2158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:25.884388", "step": 2158, "epoch": 1 }, { "type": "loss", "content": 0.008656726218760014, "timestamp": "2025-09-10 02:20:25.891425", "step": 2159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:25.921831", "step": 2159, "epoch": 1 }, { "type": "loss", "content": 0.004763355012983084, "timestamp": "2025-09-10 02:20:25.949770", "step": 2160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:25.981370", "step": 2160, "epoch": 1 }, { "type": "loss", "content": 0.008526794612407684, "timestamp": "2025-09-10 02:20:25.991080", "step": 2161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:26.022544", "step": 2161, "epoch": 1 }, { "type": "loss", "content": 0.055472858250141144, "timestamp": "2025-09-10 02:20:26.029601", "step": 2162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:26.061022", "step": 2162, "epoch": 1 }, { "type": "loss", "content": 0.0049760532565414906, "timestamp": "2025-09-10 02:20:26.065071", "step": 2163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:26.100965", "step": 2163, "epoch": 1 }, { "type": "loss", "content": 0.02341938205063343, "timestamp": "2025-09-10 02:20:26.135653", "step": 2164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:26.167456", "step": 2164, "epoch": 1 }, { "type": "loss", "content": 0.01208476722240448, "timestamp": "2025-09-10 02:20:26.172621", "step": 2165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:26.204912", "step": 2165, "epoch": 1 }, { "type": "loss", "content": 0.005906891077756882, "timestamp": "2025-09-10 02:20:26.215000", "step": 2166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:26.246522", "step": 2166, "epoch": 1 }, { "type": "loss", "content": 0.001993507379665971, "timestamp": "2025-09-10 02:20:26.256675", "step": 2167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:26.288004", "step": 2167, "epoch": 1 }, { "type": "loss", "content": 0.0028882776387035847, "timestamp": "2025-09-10 02:20:26.319213", "step": 2168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:26.350083", "step": 2168, "epoch": 1 }, { "type": "loss", "content": 0.0018221037462353706, "timestamp": "2025-09-10 02:20:26.352302", "step": 2169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:26.383804", "step": 2169, "epoch": 1 }, { "type": "loss", "content": 0.012003413401544094, "timestamp": "2025-09-10 02:20:26.390523", "step": 2170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:26.421987", "step": 2170, "epoch": 1 }, { "type": "loss", "content": 0.005830215755850077, "timestamp": "2025-09-10 02:20:26.432042", "step": 2171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:26.463153", "step": 2171, "epoch": 1 }, { "type": "loss", "content": 0.025055332109332085, "timestamp": "2025-09-10 02:20:26.490728", "step": 2172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:26.522171", "step": 2172, "epoch": 1 }, { "type": "loss", "content": 0.020759152248501778, "timestamp": "2025-09-10 02:20:26.527153", "step": 2173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:26.558543", "step": 2173, "epoch": 1 }, { "type": "loss", "content": 0.0036895188968628645, "timestamp": "2025-09-10 02:20:26.565459", "step": 2174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:26.599427", "step": 2174, "epoch": 1 }, { "type": "loss", "content": 0.021146830171346664, "timestamp": "2025-09-10 02:20:26.610450", "step": 2175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:26.642089", "step": 2175, "epoch": 1 }, { "type": "loss", "content": 0.03294152021408081, "timestamp": "2025-09-10 02:20:26.675459", "step": 2176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:26.707514", "step": 2176, "epoch": 1 }, { "type": "loss", "content": 0.028937892988324165, "timestamp": "2025-09-10 02:20:26.709724", "step": 2177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:26.741051", "step": 2177, "epoch": 1 }, { "type": "loss", "content": 0.005948805715888739, "timestamp": "2025-09-10 02:20:26.743695", "step": 2178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:26.775324", "step": 2178, "epoch": 1 }, { "type": "loss", "content": 0.014749663881957531, "timestamp": "2025-09-10 02:20:26.778142", "step": 2179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:26.814127", "step": 2179, "epoch": 1 }, { "type": "loss", "content": 0.022330453619360924, "timestamp": "2025-09-10 02:20:26.848408", "step": 2180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:26.880605", "step": 2180, "epoch": 1 }, { "type": "loss", "content": 0.009073344990611076, "timestamp": "2025-09-10 02:20:26.885273", "step": 2181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:26.917146", "step": 2181, "epoch": 1 }, { "type": "loss", "content": 0.00725422753021121, "timestamp": "2025-09-10 02:20:26.924616", "step": 2182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:26.955341", "step": 2182, "epoch": 1 }, { "type": "loss", "content": 0.03919028118252754, "timestamp": "2025-09-10 02:20:26.959402", "step": 2183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:26.990765", "step": 2183, "epoch": 1 }, { "type": "loss", "content": 0.03313456103205681, "timestamp": "2025-09-10 02:20:27.021647", "step": 2184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:27.052764", "step": 2184, "epoch": 1 }, { "type": "loss", "content": 0.023840798065066338, "timestamp": "2025-09-10 02:20:27.055205", "step": 2185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:27.087124", "step": 2185, "epoch": 1 }, { "type": "loss", "content": 0.008154499344527721, "timestamp": "2025-09-10 02:20:27.091545", "step": 2186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:27.125598", "step": 2186, "epoch": 1 }, { "type": "loss", "content": 0.021896088495850563, "timestamp": "2025-09-10 02:20:27.139435", "step": 2187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:27.170364", "step": 2187, "epoch": 1 }, { "type": "loss", "content": 0.02253473922610283, "timestamp": "2025-09-10 02:20:27.198062", "step": 2188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:27.229184", "step": 2188, "epoch": 1 }, { "type": "loss", "content": 0.020850541070103645, "timestamp": "2025-09-10 02:20:27.231530", "step": 2189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:27.263642", "step": 2189, "epoch": 1 }, { "type": "loss", "content": 0.01595452055335045, "timestamp": "2025-09-10 02:20:27.271524", "step": 2190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:20:27.313229", "step": 2190, "epoch": 1 }, { "type": "loss", "content": 0.014829051680862904, "timestamp": "2025-09-10 02:20:27.330363", "step": 2191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:27.361318", "step": 2191, "epoch": 1 }, { "type": "loss", "content": 0.004625355359166861, "timestamp": "2025-09-10 02:20:27.385169", "step": 2192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:27.418130", "step": 2192, "epoch": 1 }, { "type": "loss", "content": 0.004544160328805447, "timestamp": "2025-09-10 02:20:27.422481", "step": 2193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:27.454944", "step": 2193, "epoch": 1 }, { "type": "loss", "content": 0.004428909160196781, "timestamp": "2025-09-10 02:20:27.467282", "step": 2194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:27.499428", "step": 2194, "epoch": 1 }, { "type": "loss", "content": 0.002593469340354204, "timestamp": "2025-09-10 02:20:27.506749", "step": 2195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:27.538937", "step": 2195, "epoch": 1 }, { "type": "loss", "content": 0.0045441207475960255, "timestamp": "2025-09-10 02:20:27.570800", "step": 2196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:27.603970", "step": 2196, "epoch": 1 }, { "type": "loss", "content": 0.0163312666118145, "timestamp": "2025-09-10 02:20:27.606468", "step": 2197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:27.638454", "step": 2197, "epoch": 1 }, { "type": "loss", "content": 0.008555804379284382, "timestamp": "2025-09-10 02:20:27.641211", "step": 2198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:27.672756", "step": 2198, "epoch": 1 }, { "type": "loss", "content": 0.008500153198838234, "timestamp": "2025-09-10 02:20:27.675218", "step": 2199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:27.707569", "step": 2199, "epoch": 1 }, { "type": "loss", "content": 0.006550853606313467, "timestamp": "2025-09-10 02:20:27.732976", "step": 2200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:27.764385", "step": 2200, "epoch": 1 }, { "type": "loss", "content": 0.013699221424758434, "timestamp": "2025-09-10 02:20:27.766855", "step": 2201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:27.797602", "step": 2201, "epoch": 1 }, { "type": "loss", "content": 0.008543393574655056, "timestamp": "2025-09-10 02:20:27.805319", "step": 2202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:27.837410", "step": 2202, "epoch": 1 }, { "type": "loss", "content": 0.01858612895011902, "timestamp": "2025-09-10 02:20:27.844392", "step": 2203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:27.875489", "step": 2203, "epoch": 1 }, { "type": "loss", "content": 0.01539852935820818, "timestamp": "2025-09-10 02:20:27.900629", "step": 2204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:27.931630", "step": 2204, "epoch": 1 }, { "type": "loss", "content": 0.0037464885972440243, "timestamp": "2025-09-10 02:20:27.936471", "step": 2205, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:20:38.286390", "step": 2205, "epoch": 1 }, { "type": "pplx", "content": 17307864.411721557, "timestamp": "2025-09-10 02:20:38.289200", "step": 2205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:38.319658", "step": 2205, "epoch": 1 }, { "type": "loss", "content": 0.03866380453109741, "timestamp": "2025-09-10 02:20:38.330198", "step": 2206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:38.363594", "step": 2206, "epoch": 1 }, { "type": "loss", "content": 0.0060219233855605125, "timestamp": "2025-09-10 02:20:38.370493", "step": 2207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:38.402011", "step": 2207, "epoch": 1 }, { "type": "loss", "content": 0.01440694835036993, "timestamp": "2025-09-10 02:20:38.433053", "step": 2208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:38.465167", "step": 2208, "epoch": 1 }, { "type": "loss", "content": 0.01927160657942295, "timestamp": "2025-09-10 02:20:38.469613", "step": 2209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:38.500179", "step": 2209, "epoch": 1 }, { "type": "loss", "content": 0.011063886806368828, "timestamp": "2025-09-10 02:20:38.507667", "step": 2210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:38.538872", "step": 2210, "epoch": 1 }, { "type": "loss", "content": 0.01112239807844162, "timestamp": "2025-09-10 02:20:38.546466", "step": 2211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:38.578628", "step": 2211, "epoch": 1 }, { "type": "loss", "content": 0.007352620828896761, "timestamp": "2025-09-10 02:20:38.606233", "step": 2212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:38.638343", "step": 2212, "epoch": 1 }, { "type": "loss", "content": 0.004721261560916901, "timestamp": "2025-09-10 02:20:38.643046", "step": 2213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:38.673567", "step": 2213, "epoch": 1 }, { "type": "loss", "content": 0.0031937905587255955, "timestamp": "2025-09-10 02:20:38.680945", "step": 2214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:38.712648", "step": 2214, "epoch": 1 }, { "type": "loss", "content": 0.027609581127762794, "timestamp": "2025-09-10 02:20:38.723411", "step": 2215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:38.754538", "step": 2215, "epoch": 1 }, { "type": "loss", "content": 0.030603447929024696, "timestamp": "2025-09-10 02:20:38.783175", "step": 2216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:38.814814", "step": 2216, "epoch": 1 }, { "type": "loss", "content": 0.037538252770900726, "timestamp": "2025-09-10 02:20:38.820166", "step": 2217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:38.851234", "step": 2217, "epoch": 1 }, { "type": "loss", "content": 0.01803760603070259, "timestamp": "2025-09-10 02:20:38.855760", "step": 2218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:38.886704", "step": 2218, "epoch": 1 }, { "type": "loss", "content": 0.0046121361665427685, "timestamp": "2025-09-10 02:20:38.893551", "step": 2219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:38.924971", "step": 2219, "epoch": 1 }, { "type": "loss", "content": 0.011201003566384315, "timestamp": "2025-09-10 02:20:38.957648", "step": 2220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:38.989597", "step": 2220, "epoch": 1 }, { "type": "loss", "content": 0.02293272875249386, "timestamp": "2025-09-10 02:20:38.994574", "step": 2221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:39.025660", "step": 2221, "epoch": 1 }, { "type": "loss", "content": 0.010559617541730404, "timestamp": "2025-09-10 02:20:39.033153", "step": 2222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:39.064015", "step": 2222, "epoch": 1 }, { "type": "loss", "content": 0.0019281571730971336, "timestamp": "2025-09-10 02:20:39.070877", "step": 2223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:39.102032", "step": 2223, "epoch": 1 }, { "type": "loss", "content": 0.0065590995363891125, "timestamp": "2025-09-10 02:20:39.134526", "step": 2224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:39.166870", "step": 2224, "epoch": 1 }, { "type": "loss", "content": 0.0038079393561929464, "timestamp": "2025-09-10 02:20:39.174362", "step": 2225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:39.207567", "step": 2225, "epoch": 1 }, { "type": "loss", "content": 0.015453227795660496, "timestamp": "2025-09-10 02:20:39.219510", "step": 2226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:39.250961", "step": 2226, "epoch": 1 }, { "type": "loss", "content": 0.004905925132334232, "timestamp": "2025-09-10 02:20:39.255352", "step": 2227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:39.286245", "step": 2227, "epoch": 1 }, { "type": "loss", "content": 0.001167826121672988, "timestamp": "2025-09-10 02:20:39.314110", "step": 2228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:39.344406", "step": 2228, "epoch": 1 }, { "type": "loss", "content": 0.013551952317357063, "timestamp": "2025-09-10 02:20:39.346565", "step": 2229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:39.377987", "step": 2229, "epoch": 1 }, { "type": "loss", "content": 0.005024695303291082, "timestamp": "2025-09-10 02:20:39.384822", "step": 2230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:20:39.427690", "step": 2230, "epoch": 1 }, { "type": "loss", "content": 0.025890955701470375, "timestamp": "2025-09-10 02:20:39.445135", "step": 2231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:39.479800", "step": 2231, "epoch": 1 }, { "type": "loss", "content": 0.005515389610081911, "timestamp": "2025-09-10 02:20:39.511039", "step": 2232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:39.542422", "step": 2232, "epoch": 1 }, { "type": "loss", "content": 0.015928935259580612, "timestamp": "2025-09-10 02:20:39.547540", "step": 2233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:39.580999", "step": 2233, "epoch": 1 }, { "type": "loss", "content": 0.0011972986394539475, "timestamp": "2025-09-10 02:20:39.588232", "step": 2234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:39.622143", "step": 2234, "epoch": 1 }, { "type": "loss", "content": 0.00578897912055254, "timestamp": "2025-09-10 02:20:39.625930", "step": 2235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:20:39.664400", "step": 2235, "epoch": 1 }, { "type": "loss", "content": 0.0018228079425171018, "timestamp": "2025-09-10 02:20:39.701132", "step": 2236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:39.731827", "step": 2236, "epoch": 1 }, { "type": "loss", "content": 0.0009967860532924533, "timestamp": "2025-09-10 02:20:39.734081", "step": 2237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:39.764834", "step": 2237, "epoch": 1 }, { "type": "loss", "content": 0.027059337124228477, "timestamp": "2025-09-10 02:20:39.769194", "step": 2238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:39.800765", "step": 2238, "epoch": 1 }, { "type": "loss", "content": 0.02666650339961052, "timestamp": "2025-09-10 02:20:39.808311", "step": 2239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:39.840159", "step": 2239, "epoch": 1 }, { "type": "loss", "content": 0.024572152644395828, "timestamp": "2025-09-10 02:20:39.871790", "step": 2240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:39.903716", "step": 2240, "epoch": 1 }, { "type": "loss", "content": 0.006256606429815292, "timestamp": "2025-09-10 02:20:39.908708", "step": 2241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:39.943990", "step": 2241, "epoch": 1 }, { "type": "loss", "content": 0.009752501733601093, "timestamp": "2025-09-10 02:20:39.957370", "step": 2242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:39.988230", "step": 2242, "epoch": 1 }, { "type": "loss", "content": 0.0497528612613678, "timestamp": "2025-09-10 02:20:39.990533", "step": 2243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.022012", "step": 2243, "epoch": 1 }, { "type": "loss", "content": 0.010288014076650143, "timestamp": "2025-09-10 02:20:40.049924", "step": 2244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.082626", "step": 2244, "epoch": 1 }, { "type": "loss", "content": 0.004436141811311245, "timestamp": "2025-09-10 02:20:40.087296", "step": 2245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:20:40.129627", "step": 2245, "epoch": 1 }, { "type": "loss", "content": 0.016879761591553688, "timestamp": "2025-09-10 02:20:40.147151", "step": 2246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:40.179232", "step": 2246, "epoch": 1 }, { "type": "loss", "content": 0.005393616855144501, "timestamp": "2025-09-10 02:20:40.189267", "step": 2247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:40.220928", "step": 2247, "epoch": 1 }, { "type": "loss", "content": 0.006263823714107275, "timestamp": "2025-09-10 02:20:40.251889", "step": 2248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:40.284453", "step": 2248, "epoch": 1 }, { "type": "loss", "content": 0.0019508127588778734, "timestamp": "2025-09-10 02:20:40.288974", "step": 2249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:40.321075", "step": 2249, "epoch": 1 }, { "type": "loss", "content": 0.016456058248877525, "timestamp": "2025-09-10 02:20:40.323232", "step": 2250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:40.354189", "step": 2250, "epoch": 1 }, { "type": "loss", "content": 0.0015526397619396448, "timestamp": "2025-09-10 02:20:40.361701", "step": 2251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:20:40.401026", "step": 2251, "epoch": 1 }, { "type": "loss", "content": 0.002204819582402706, "timestamp": "2025-09-10 02:20:40.437770", "step": 2252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:40.469534", "step": 2252, "epoch": 1 }, { "type": "loss", "content": 0.0038875446189194918, "timestamp": "2025-09-10 02:20:40.477752", "step": 2253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.509005", "step": 2253, "epoch": 1 }, { "type": "loss", "content": 0.003834874602034688, "timestamp": "2025-09-10 02:20:40.515829", "step": 2254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:40.546182", "step": 2254, "epoch": 1 }, { "type": "loss", "content": 0.007351601030677557, "timestamp": "2025-09-10 02:20:40.553690", "step": 2255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:40.585632", "step": 2255, "epoch": 1 }, { "type": "loss", "content": 0.018372252583503723, "timestamp": "2025-09-10 02:20:40.614197", "step": 2256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:40.647153", "step": 2256, "epoch": 1 }, { "type": "loss", "content": 0.013634584844112396, "timestamp": "2025-09-10 02:20:40.660245", "step": 2257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.691001", "step": 2257, "epoch": 1 }, { "type": "loss", "content": 0.008646705187857151, "timestamp": "2025-09-10 02:20:40.697964", "step": 2258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:40.729949", "step": 2258, "epoch": 1 }, { "type": "loss", "content": 0.008055547252297401, "timestamp": "2025-09-10 02:20:40.737425", "step": 2259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.769504", "step": 2259, "epoch": 1 }, { "type": "loss", "content": 0.019878502935171127, "timestamp": "2025-09-10 02:20:40.796993", "step": 2260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:40.830990", "step": 2260, "epoch": 1 }, { "type": "loss", "content": 0.007829791866242886, "timestamp": "2025-09-10 02:20:40.837243", "step": 2261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:40.868790", "step": 2261, "epoch": 1 }, { "type": "loss", "content": 0.01760595664381981, "timestamp": "2025-09-10 02:20:40.872455", "step": 2262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:40.910484", "step": 2262, "epoch": 1 }, { "type": "loss", "content": 0.0019513925071805716, "timestamp": "2025-09-10 02:20:40.926123", "step": 2263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:40.957429", "step": 2263, "epoch": 1 }, { "type": "loss", "content": 0.0019971609581261873, "timestamp": "2025-09-10 02:20:40.985426", "step": 2264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:41.016592", "step": 2264, "epoch": 1 }, { "type": "loss", "content": 0.006590723525732756, "timestamp": "2025-09-10 02:20:41.024702", "step": 2265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:41.058410", "step": 2265, "epoch": 1 }, { "type": "loss", "content": 0.0263630710542202, "timestamp": "2025-09-10 02:20:41.071772", "step": 2266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:41.103787", "step": 2266, "epoch": 1 }, { "type": "loss", "content": 0.003090274054557085, "timestamp": "2025-09-10 02:20:41.115712", "step": 2267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:41.146821", "step": 2267, "epoch": 1 }, { "type": "loss", "content": 0.01568448916077614, "timestamp": "2025-09-10 02:20:41.174954", "step": 2268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:41.206104", "step": 2268, "epoch": 1 }, { "type": "loss", "content": 0.0363653302192688, "timestamp": "2025-09-10 02:20:41.215843", "step": 2269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:41.247801", "step": 2269, "epoch": 1 }, { "type": "loss", "content": 0.014266527257859707, "timestamp": "2025-09-10 02:20:41.254956", "step": 2270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:41.288532", "step": 2270, "epoch": 1 }, { "type": "loss", "content": 0.0015003466978669167, "timestamp": "2025-09-10 02:20:41.301902", "step": 2271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:41.333348", "step": 2271, "epoch": 1 }, { "type": "loss", "content": 0.0023136555682867765, "timestamp": "2025-09-10 02:20:41.366712", "step": 2272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:41.400192", "step": 2272, "epoch": 1 }, { "type": "loss", "content": 0.021259073168039322, "timestamp": "2025-09-10 02:20:41.404442", "step": 2273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:41.436347", "step": 2273, "epoch": 1 }, { "type": "loss", "content": 0.0009670493309386075, "timestamp": "2025-09-10 02:20:41.438630", "step": 2274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:41.469990", "step": 2274, "epoch": 1 }, { "type": "loss", "content": 0.02005203254520893, "timestamp": "2025-09-10 02:20:41.472485", "step": 2275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:41.504586", "step": 2275, "epoch": 1 }, { "type": "loss", "content": 0.0043064602650702, "timestamp": "2025-09-10 02:20:41.532548", "step": 2276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:41.563391", "step": 2276, "epoch": 1 }, { "type": "loss", "content": 0.01462549064308405, "timestamp": "2025-09-10 02:20:41.568013", "step": 2277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:41.599710", "step": 2277, "epoch": 1 }, { "type": "loss", "content": 0.01992652751505375, "timestamp": "2025-09-10 02:20:41.606950", "step": 2278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:41.638093", "step": 2278, "epoch": 1 }, { "type": "loss", "content": 0.0108196334913373, "timestamp": "2025-09-10 02:20:41.645374", "step": 2279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:41.676660", "step": 2279, "epoch": 1 }, { "type": "loss", "content": 0.001470404677093029, "timestamp": "2025-09-10 02:20:41.707742", "step": 2280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:41.737903", "step": 2280, "epoch": 1 }, { "type": "loss", "content": 0.004251373466104269, "timestamp": "2025-09-10 02:20:41.742522", "step": 2281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:20:41.777090", "step": 2281, "epoch": 1 }, { "type": "loss", "content": 0.009216892533004284, "timestamp": "2025-09-10 02:20:41.791186", "step": 2282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:41.826307", "step": 2282, "epoch": 1 }, { "type": "loss", "content": 0.020200418308377266, "timestamp": "2025-09-10 02:20:41.840250", "step": 2283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:41.870676", "step": 2283, "epoch": 1 }, { "type": "loss", "content": 0.0011728627141565084, "timestamp": "2025-09-10 02:20:41.895569", "step": 2284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:20:41.927789", "step": 2284, "epoch": 1 }, { "type": "loss", "content": 0.004072748590260744, "timestamp": "2025-09-10 02:20:41.940922", "step": 2285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:41.971763", "step": 2285, "epoch": 1 }, { "type": "loss", "content": 0.03455633297562599, "timestamp": "2025-09-10 02:20:41.978289", "step": 2286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:42.012708", "step": 2286, "epoch": 1 }, { "type": "loss", "content": 0.01204759068787098, "timestamp": "2025-09-10 02:20:42.020129", "step": 2287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:42.051484", "step": 2287, "epoch": 1 }, { "type": "loss", "content": 0.0008365390822291374, "timestamp": "2025-09-10 02:20:42.076075", "step": 2288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:42.108564", "step": 2288, "epoch": 1 }, { "type": "loss", "content": 0.04001276567578316, "timestamp": "2025-09-10 02:20:42.112886", "step": 2289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:42.143380", "step": 2289, "epoch": 1 }, { "type": "loss", "content": 0.007685269229114056, "timestamp": "2025-09-10 02:20:42.147494", "step": 2290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:42.178433", "step": 2290, "epoch": 1 }, { "type": "loss", "content": 0.0020888035651296377, "timestamp": "2025-09-10 02:20:42.191101", "step": 2291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:42.222633", "step": 2291, "epoch": 1 }, { "type": "loss", "content": 0.0040938640013337135, "timestamp": "2025-09-10 02:20:42.250470", "step": 2292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:42.281635", "step": 2292, "epoch": 1 }, { "type": "loss", "content": 0.001731925061903894, "timestamp": "2025-09-10 02:20:42.286685", "step": 2293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:42.317261", "step": 2293, "epoch": 1 }, { "type": "loss", "content": 0.020662318915128708, "timestamp": "2025-09-10 02:20:42.327342", "step": 2294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:42.358388", "step": 2294, "epoch": 1 }, { "type": "loss", "content": 0.0022532641887664795, "timestamp": "2025-09-10 02:20:42.368483", "step": 2295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:42.398792", "step": 2295, "epoch": 1 }, { "type": "loss", "content": 0.0038333218544721603, "timestamp": "2025-09-10 02:20:42.424245", "step": 2296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:42.455186", "step": 2296, "epoch": 1 }, { "type": "loss", "content": 0.014495732262730598, "timestamp": "2025-09-10 02:20:42.457421", "step": 2297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:42.488427", "step": 2297, "epoch": 1 }, { "type": "loss", "content": 0.017322422936558723, "timestamp": "2025-09-10 02:20:42.495776", "step": 2298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:42.526511", "step": 2298, "epoch": 1 }, { "type": "loss", "content": 0.0031519909389317036, "timestamp": "2025-09-10 02:20:42.530996", "step": 2299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:42.562167", "step": 2299, "epoch": 1 }, { "type": "loss", "content": 0.0060340953059494495, "timestamp": "2025-09-10 02:20:42.593565", "step": 2300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:42.624703", "step": 2300, "epoch": 1 }, { "type": "loss", "content": 0.0017768697580322623, "timestamp": "2025-09-10 02:20:42.626635", "step": 2301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:42.657184", "step": 2301, "epoch": 1 }, { "type": "loss", "content": 0.002710967091843486, "timestamp": "2025-09-10 02:20:42.661251", "step": 2302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:42.692441", "step": 2302, "epoch": 1 }, { "type": "loss", "content": 0.002997096860781312, "timestamp": "2025-09-10 02:20:42.703313", "step": 2303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:42.737922", "step": 2303, "epoch": 1 }, { "type": "loss", "content": 0.0036649901885539293, "timestamp": "2025-09-10 02:20:42.772214", "step": 2304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:42.802806", "step": 2304, "epoch": 1 }, { "type": "loss", "content": 0.01135755330324173, "timestamp": "2025-09-10 02:20:42.807834", "step": 2305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:42.846726", "step": 2305, "epoch": 1 }, { "type": "loss", "content": 0.005223255138844252, "timestamp": "2025-09-10 02:20:42.862333", "step": 2306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:42.894696", "step": 2306, "epoch": 1 }, { "type": "loss", "content": 0.007239340338855982, "timestamp": "2025-09-10 02:20:42.902033", "step": 2307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:20:42.944257", "step": 2307, "epoch": 1 }, { "type": "loss", "content": 0.017995523288846016, "timestamp": "2025-09-10 02:20:42.982439", "step": 2308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:43.013889", "step": 2308, "epoch": 1 }, { "type": "loss", "content": 0.008536996319890022, "timestamp": "2025-09-10 02:20:43.018208", "step": 2309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:43.048443", "step": 2309, "epoch": 1 }, { "type": "loss", "content": 0.028488274663686752, "timestamp": "2025-09-10 02:20:43.052782", "step": 2310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:43.083593", "step": 2310, "epoch": 1 }, { "type": "loss", "content": 0.04561712220311165, "timestamp": "2025-09-10 02:20:43.088056", "step": 2311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:43.119831", "step": 2311, "epoch": 1 }, { "type": "loss", "content": 0.012657607905566692, "timestamp": "2025-09-10 02:20:43.147693", "step": 2312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:43.177914", "step": 2312, "epoch": 1 }, { "type": "loss", "content": 0.010057737119495869, "timestamp": "2025-09-10 02:20:43.182495", "step": 2313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:43.215199", "step": 2313, "epoch": 1 }, { "type": "loss", "content": 0.019854340702295303, "timestamp": "2025-09-10 02:20:43.226193", "step": 2314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:43.259025", "step": 2314, "epoch": 1 }, { "type": "loss", "content": 0.014225935563445091, "timestamp": "2025-09-10 02:20:43.269294", "step": 2315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:43.303944", "step": 2315, "epoch": 1 }, { "type": "loss", "content": 0.0027752441819757223, "timestamp": "2025-09-10 02:20:43.331839", "step": 2316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:20:43.362643", "step": 2316, "epoch": 1 }, { "type": "loss", "content": 0.006470884662121534, "timestamp": "2025-09-10 02:20:43.365156", "step": 2317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:43.396458", "step": 2317, "epoch": 1 }, { "type": "loss", "content": 0.00640989001840353, "timestamp": "2025-09-10 02:20:43.400569", "step": 2318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:43.432483", "step": 2318, "epoch": 1 }, { "type": "loss", "content": 0.01300732046365738, "timestamp": "2025-09-10 02:20:43.445053", "step": 2319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:43.475866", "step": 2319, "epoch": 1 }, { "type": "loss", "content": 0.005316443741321564, "timestamp": "2025-09-10 02:20:43.499172", "step": 2320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:43.532720", "step": 2320, "epoch": 1 }, { "type": "loss", "content": 0.014593763276934624, "timestamp": "2025-09-10 02:20:43.536478", "step": 2321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:43.570893", "step": 2321, "epoch": 1 }, { "type": "loss", "content": 0.02588742971420288, "timestamp": "2025-09-10 02:20:43.584597", "step": 2322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:43.620409", "step": 2322, "epoch": 1 }, { "type": "loss", "content": 0.002280434826388955, "timestamp": "2025-09-10 02:20:43.634093", "step": 2323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:43.665408", "step": 2323, "epoch": 1 }, { "type": "loss", "content": 0.011547980830073357, "timestamp": "2025-09-10 02:20:43.692821", "step": 2324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:43.725615", "step": 2324, "epoch": 1 }, { "type": "loss", "content": 0.019922899082303047, "timestamp": "2025-09-10 02:20:43.733524", "step": 2325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:43.764663", "step": 2325, "epoch": 1 }, { "type": "loss", "content": 0.0015016960678622127, "timestamp": "2025-09-10 02:20:43.774636", "step": 2326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:20:43.809550", "step": 2326, "epoch": 1 }, { "type": "loss", "content": 0.019193019717931747, "timestamp": "2025-09-10 02:20:43.823553", "step": 2327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:20:43.864804", "step": 2327, "epoch": 1 }, { "type": "loss", "content": 0.009289233013987541, "timestamp": "2025-09-10 02:20:43.902812", "step": 2328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:43.937387", "step": 2328, "epoch": 1 }, { "type": "loss", "content": 0.0029966922011226416, "timestamp": "2025-09-10 02:20:43.939136", "step": 2329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:43.969998", "step": 2329, "epoch": 1 }, { "type": "loss", "content": 0.011959855444729328, "timestamp": "2025-09-10 02:20:43.974267", "step": 2330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:44.008145", "step": 2330, "epoch": 1 }, { "type": "loss", "content": 0.0025715772062540054, "timestamp": "2025-09-10 02:20:44.021546", "step": 2331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:44.053744", "step": 2331, "epoch": 1 }, { "type": "loss", "content": 0.007740771863609552, "timestamp": "2025-09-10 02:20:44.083824", "step": 2332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.116498", "step": 2332, "epoch": 1 }, { "type": "loss", "content": 0.008812974207103252, "timestamp": "2025-09-10 02:20:44.120577", "step": 2333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:44.153115", "step": 2333, "epoch": 1 }, { "type": "loss", "content": 0.009404649026691914, "timestamp": "2025-09-10 02:20:44.164292", "step": 2334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:44.195361", "step": 2334, "epoch": 1 }, { "type": "loss", "content": 0.002871322212740779, "timestamp": "2025-09-10 02:20:44.199476", "step": 2335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.231014", "step": 2335, "epoch": 1 }, { "type": "loss", "content": 0.00894598476588726, "timestamp": "2025-09-10 02:20:44.259219", "step": 2336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:44.290134", "step": 2336, "epoch": 1 }, { "type": "loss", "content": 0.0016351427184417844, "timestamp": "2025-09-10 02:20:44.294752", "step": 2337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:20:44.336635", "step": 2337, "epoch": 1 }, { "type": "loss", "content": 0.03164186701178551, "timestamp": "2025-09-10 02:20:44.353688", "step": 2338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:44.385870", "step": 2338, "epoch": 1 }, { "type": "loss", "content": 0.002591783879324794, "timestamp": "2025-09-10 02:20:44.392350", "step": 2339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.423985", "step": 2339, "epoch": 1 }, { "type": "loss", "content": 0.006208306644111872, "timestamp": "2025-09-10 02:20:44.452230", "step": 2340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:44.483661", "step": 2340, "epoch": 1 }, { "type": "loss", "content": 0.034006860107183456, "timestamp": "2025-09-10 02:20:44.496386", "step": 2341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:20:44.536358", "step": 2341, "epoch": 1 }, { "type": "loss", "content": 0.005256416741758585, "timestamp": "2025-09-10 02:20:44.552804", "step": 2342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:44.584822", "step": 2342, "epoch": 1 }, { "type": "loss", "content": 0.009249741211533546, "timestamp": "2025-09-10 02:20:44.591960", "step": 2343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:44.622800", "step": 2343, "epoch": 1 }, { "type": "loss", "content": 0.007146596908569336, "timestamp": "2025-09-10 02:20:44.651295", "step": 2344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:44.682659", "step": 2344, "epoch": 1 }, { "type": "loss", "content": 0.0049681952223181725, "timestamp": "2025-09-10 02:20:44.688005", "step": 2345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.718988", "step": 2345, "epoch": 1 }, { "type": "loss", "content": 0.018689943477511406, "timestamp": "2025-09-10 02:20:44.726369", "step": 2346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.757158", "step": 2346, "epoch": 1 }, { "type": "loss", "content": 0.022296303883194923, "timestamp": "2025-09-10 02:20:44.764741", "step": 2347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:44.796511", "step": 2347, "epoch": 1 }, { "type": "loss", "content": 0.0007350871455855668, "timestamp": "2025-09-10 02:20:44.828415", "step": 2348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:44.859942", "step": 2348, "epoch": 1 }, { "type": "loss", "content": 0.0011058534728363156, "timestamp": "2025-09-10 02:20:44.862115", "step": 2349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:44.896598", "step": 2349, "epoch": 1 }, { "type": "loss", "content": 0.027341008186340332, "timestamp": "2025-09-10 02:20:44.910312", "step": 2350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:44.942224", "step": 2350, "epoch": 1 }, { "type": "loss", "content": 0.007282934617251158, "timestamp": "2025-09-10 02:20:44.949678", "step": 2351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:20:45.004125", "step": 2351, "epoch": 1 }, { "type": "loss", "content": 0.00618086289614439, "timestamp": "2025-09-10 02:20:45.048452", "step": 2352, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:20:55.262898", "step": 2352, "epoch": 1 }, { "type": "pplx", "content": 18824440.007556766, "timestamp": "2025-09-10 02:20:55.266064", "step": 2352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:55.295642", "step": 2352, "epoch": 1 }, { "type": "loss", "content": 0.012922325171530247, "timestamp": "2025-09-10 02:20:55.302810", "step": 2353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:55.333710", "step": 2353, "epoch": 1 }, { "type": "loss", "content": 0.0021893230732530355, "timestamp": "2025-09-10 02:20:55.341004", "step": 2354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:55.371723", "step": 2354, "epoch": 1 }, { "type": "loss", "content": 0.0217142216861248, "timestamp": "2025-09-10 02:20:55.375719", "step": 2355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:20:55.422689", "step": 2355, "epoch": 1 }, { "type": "loss", "content": 0.0012116653379052877, "timestamp": "2025-09-10 02:20:55.462794", "step": 2356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 3, 224 ], "flops": 4983601869792 }, "timestamp": "2025-09-10 02:20:55.515007", "step": 2356, "epoch": 1 }, { "type": "loss", "content": 0.0030051611829549074, "timestamp": "2025-09-10 02:20:55.517186", "step": 2357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:55.565649", "step": 2357, "epoch": 2 }, { "type": "loss", "content": 0.001868675695732236, "timestamp": "2025-09-10 02:20:55.570508", "step": 2358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:55.601669", "step": 2358, "epoch": 2 }, { "type": "loss", "content": 0.006056786980479956, "timestamp": "2025-09-10 02:20:55.609223", "step": 2359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:55.640144", "step": 2359, "epoch": 2 }, { "type": "loss", "content": 0.01372221577912569, "timestamp": "2025-09-10 02:20:55.668003", "step": 2360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:55.698857", "step": 2360, "epoch": 2 }, { "type": "loss", "content": 0.0012454865500330925, "timestamp": "2025-09-10 02:20:55.703516", "step": 2361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:55.734399", "step": 2361, "epoch": 2 }, { "type": "loss", "content": 0.0016425231005996466, "timestamp": "2025-09-10 02:20:55.741488", "step": 2362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:55.771791", "step": 2362, "epoch": 2 }, { "type": "loss", "content": 0.011411800980567932, "timestamp": "2025-09-10 02:20:55.775839", "step": 2363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:55.807165", "step": 2363, "epoch": 2 }, { "type": "loss", "content": 0.035575442016124725, "timestamp": "2025-09-10 02:20:55.835049", "step": 2364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:55.865716", "step": 2364, "epoch": 2 }, { "type": "loss", "content": 0.028888003900647163, "timestamp": "2025-09-10 02:20:55.870861", "step": 2365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:55.902237", "step": 2365, "epoch": 2 }, { "type": "loss", "content": 0.013811533339321613, "timestamp": "2025-09-10 02:20:55.912491", "step": 2366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:55.943287", "step": 2366, "epoch": 2 }, { "type": "loss", "content": 0.001658704481087625, "timestamp": "2025-09-10 02:20:55.951124", "step": 2367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:55.982180", "step": 2367, "epoch": 2 }, { "type": "loss", "content": 0.007761240005493164, "timestamp": "2025-09-10 02:20:56.009977", "step": 2368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:56.041483", "step": 2368, "epoch": 2 }, { "type": "loss", "content": 0.019760314375162125, "timestamp": "2025-09-10 02:20:56.045864", "step": 2369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:56.076662", "step": 2369, "epoch": 2 }, { "type": "loss", "content": 0.0020639884751290083, "timestamp": "2025-09-10 02:20:56.081232", "step": 2370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:56.114724", "step": 2370, "epoch": 2 }, { "type": "loss", "content": 0.0026808734983205795, "timestamp": "2025-09-10 02:20:56.128079", "step": 2371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:56.159084", "step": 2371, "epoch": 2 }, { "type": "loss", "content": 0.0017675552517175674, "timestamp": "2025-09-10 02:20:56.184457", "step": 2372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:56.215102", "step": 2372, "epoch": 2 }, { "type": "loss", "content": 0.0011209690710529685, "timestamp": "2025-09-10 02:20:56.217329", "step": 2373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:56.248801", "step": 2373, "epoch": 2 }, { "type": "loss", "content": 0.01740814931690693, "timestamp": "2025-09-10 02:20:56.261180", "step": 2374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:56.293507", "step": 2374, "epoch": 2 }, { "type": "loss", "content": 0.014776641502976418, "timestamp": "2025-09-10 02:20:56.298010", "step": 2375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:56.329056", "step": 2375, "epoch": 2 }, { "type": "loss", "content": 0.006744783371686935, "timestamp": "2025-09-10 02:20:56.360277", "step": 2376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:56.391300", "step": 2376, "epoch": 2 }, { "type": "loss", "content": 0.004561256151646376, "timestamp": "2025-09-10 02:20:56.393677", "step": 2377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:56.425799", "step": 2377, "epoch": 2 }, { "type": "loss", "content": 0.02400498278439045, "timestamp": "2025-09-10 02:20:56.435986", "step": 2378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:56.467356", "step": 2378, "epoch": 2 }, { "type": "loss", "content": 0.0026017860509455204, "timestamp": "2025-09-10 02:20:56.471485", "step": 2379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:56.502242", "step": 2379, "epoch": 2 }, { "type": "loss", "content": 0.0014477769145742059, "timestamp": "2025-09-10 02:20:56.530026", "step": 2380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:56.562359", "step": 2380, "epoch": 2 }, { "type": "loss", "content": 0.0038361712358891964, "timestamp": "2025-09-10 02:20:56.567242", "step": 2381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:56.599463", "step": 2381, "epoch": 2 }, { "type": "loss", "content": 0.015629353001713753, "timestamp": "2025-09-10 02:20:56.606433", "step": 2382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:56.639786", "step": 2382, "epoch": 2 }, { "type": "loss", "content": 0.0013419255847111344, "timestamp": "2025-09-10 02:20:56.647012", "step": 2383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:56.677425", "step": 2383, "epoch": 2 }, { "type": "loss", "content": 0.012586474418640137, "timestamp": "2025-09-10 02:20:56.705245", "step": 2384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:56.736712", "step": 2384, "epoch": 2 }, { "type": "loss", "content": 0.011415432207286358, "timestamp": "2025-09-10 02:20:56.739923", "step": 2385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:56.773942", "step": 2385, "epoch": 2 }, { "type": "loss", "content": 0.03539576753973961, "timestamp": "2025-09-10 02:20:56.781014", "step": 2386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:56.815401", "step": 2386, "epoch": 2 }, { "type": "loss", "content": 0.0012404642766341567, "timestamp": "2025-09-10 02:20:56.829064", "step": 2387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:56.860093", "step": 2387, "epoch": 2 }, { "type": "loss", "content": 0.0017915985081344843, "timestamp": "2025-09-10 02:20:56.887618", "step": 2388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:56.919137", "step": 2388, "epoch": 2 }, { "type": "loss", "content": 0.01956382766366005, "timestamp": "2025-09-10 02:20:56.923666", "step": 2389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:56.955340", "step": 2389, "epoch": 2 }, { "type": "loss", "content": 0.012075236067175865, "timestamp": "2025-09-10 02:20:56.959135", "step": 2390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:56.990995", "step": 2390, "epoch": 2 }, { "type": "loss", "content": 0.015462463721632957, "timestamp": "2025-09-10 02:20:56.998684", "step": 2391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:57.029491", "step": 2391, "epoch": 2 }, { "type": "loss", "content": 0.03371422737836838, "timestamp": "2025-09-10 02:20:57.060357", "step": 2392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:57.092148", "step": 2392, "epoch": 2 }, { "type": "loss", "content": 0.00984213501214981, "timestamp": "2025-09-10 02:20:57.094323", "step": 2393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:57.137137", "step": 2393, "epoch": 2 }, { "type": "loss", "content": 0.0032658951822668314, "timestamp": "2025-09-10 02:20:57.143803", "step": 2394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:57.176805", "step": 2394, "epoch": 2 }, { "type": "loss", "content": 0.00632342416793108, "timestamp": "2025-09-10 02:20:57.180620", "step": 2395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:57.211466", "step": 2395, "epoch": 2 }, { "type": "loss", "content": 0.027958959341049194, "timestamp": "2025-09-10 02:20:57.236629", "step": 2396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:57.268322", "step": 2396, "epoch": 2 }, { "type": "loss", "content": 0.007394559681415558, "timestamp": "2025-09-10 02:20:57.272723", "step": 2397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:57.305596", "step": 2397, "epoch": 2 }, { "type": "loss", "content": 0.04190784692764282, "timestamp": "2025-09-10 02:20:57.312749", "step": 2398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:57.344112", "step": 2398, "epoch": 2 }, { "type": "loss", "content": 0.006203221622854471, "timestamp": "2025-09-10 02:20:57.348174", "step": 2399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:57.381966", "step": 2399, "epoch": 2 }, { "type": "loss", "content": 0.003813191084191203, "timestamp": "2025-09-10 02:20:57.416214", "step": 2400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:57.449093", "step": 2400, "epoch": 2 }, { "type": "loss", "content": 0.0005948929465375841, "timestamp": "2025-09-10 02:20:57.453635", "step": 2401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:57.484630", "step": 2401, "epoch": 2 }, { "type": "loss", "content": 0.04219771549105644, "timestamp": "2025-09-10 02:20:57.488654", "step": 2402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:57.519922", "step": 2402, "epoch": 2 }, { "type": "loss", "content": 0.006709757260978222, "timestamp": "2025-09-10 02:20:57.526953", "step": 2403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:57.557872", "step": 2403, "epoch": 2 }, { "type": "loss", "content": 0.0066467165015637875, "timestamp": "2025-09-10 02:20:57.588893", "step": 2404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:57.620246", "step": 2404, "epoch": 2 }, { "type": "loss", "content": 0.0014069050084799528, "timestamp": "2025-09-10 02:20:57.624577", "step": 2405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:57.656236", "step": 2405, "epoch": 2 }, { "type": "loss", "content": 0.022929934784770012, "timestamp": "2025-09-10 02:20:57.666779", "step": 2406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:57.698496", "step": 2406, "epoch": 2 }, { "type": "loss", "content": 0.03234897926449776, "timestamp": "2025-09-10 02:20:57.709451", "step": 2407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:20:57.742453", "step": 2407, "epoch": 2 }, { "type": "loss", "content": 0.0065283398143947124, "timestamp": "2025-09-10 02:20:57.766252", "step": 2408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 928 ], "flops": 27527278844800 }, "timestamp": "2025-09-10 02:20:57.838691", "step": 2408, "epoch": 2 }, { "type": "loss", "content": 0.014821560122072697, "timestamp": "2025-09-10 02:20:57.870500", "step": 2409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:57.904843", "step": 2409, "epoch": 2 }, { "type": "loss", "content": 0.003778102109208703, "timestamp": "2025-09-10 02:20:57.909324", "step": 2410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:57.943833", "step": 2410, "epoch": 2 }, { "type": "loss", "content": 0.012805613689124584, "timestamp": "2025-09-10 02:20:57.954035", "step": 2411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:57.985528", "step": 2411, "epoch": 2 }, { "type": "loss", "content": 0.007993345148861408, "timestamp": "2025-09-10 02:20:58.013838", "step": 2412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:58.050742", "step": 2412, "epoch": 2 }, { "type": "loss", "content": 0.006639838218688965, "timestamp": "2025-09-10 02:20:58.065968", "step": 2413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:20:58.096623", "step": 2413, "epoch": 2 }, { "type": "loss", "content": 0.00533502921462059, "timestamp": "2025-09-10 02:20:58.099109", "step": 2414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:58.129457", "step": 2414, "epoch": 2 }, { "type": "loss", "content": 0.02578054927289486, "timestamp": "2025-09-10 02:20:58.136534", "step": 2415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:58.167819", "step": 2415, "epoch": 2 }, { "type": "loss", "content": 0.007064398843795061, "timestamp": "2025-09-10 02:20:58.198746", "step": 2416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:58.228972", "step": 2416, "epoch": 2 }, { "type": "loss", "content": 0.01666208915412426, "timestamp": "2025-09-10 02:20:58.234269", "step": 2417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:58.266396", "step": 2417, "epoch": 2 }, { "type": "loss", "content": 0.036728501319885254, "timestamp": "2025-09-10 02:20:58.278539", "step": 2418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:58.314188", "step": 2418, "epoch": 2 }, { "type": "loss", "content": 0.011314035393297672, "timestamp": "2025-09-10 02:20:58.320791", "step": 2419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:58.352422", "step": 2419, "epoch": 2 }, { "type": "loss", "content": 0.004533576779067516, "timestamp": "2025-09-10 02:20:58.380797", "step": 2420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:58.412606", "step": 2420, "epoch": 2 }, { "type": "loss", "content": 0.005247695837169886, "timestamp": "2025-09-10 02:20:58.417012", "step": 2421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:20:58.456744", "step": 2421, "epoch": 2 }, { "type": "loss", "content": 0.019029613584280014, "timestamp": "2025-09-10 02:20:58.472721", "step": 2422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:58.504313", "step": 2422, "epoch": 2 }, { "type": "loss", "content": 0.012553774751722813, "timestamp": "2025-09-10 02:20:58.511447", "step": 2423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:58.541783", "step": 2423, "epoch": 2 }, { "type": "loss", "content": 0.005390453618019819, "timestamp": "2025-09-10 02:20:58.566917", "step": 2424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:58.602231", "step": 2424, "epoch": 2 }, { "type": "loss", "content": 0.02095671556890011, "timestamp": "2025-09-10 02:20:58.604478", "step": 2425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:20:58.634822", "step": 2425, "epoch": 2 }, { "type": "loss", "content": 0.011055312119424343, "timestamp": "2025-09-10 02:20:58.642553", "step": 2426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:20:58.682729", "step": 2426, "epoch": 2 }, { "type": "loss", "content": 0.010104840621352196, "timestamp": "2025-09-10 02:20:58.698595", "step": 2427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:58.730437", "step": 2427, "epoch": 2 }, { "type": "loss", "content": 0.00237080454826355, "timestamp": "2025-09-10 02:20:58.758616", "step": 2428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:20:58.790689", "step": 2428, "epoch": 2 }, { "type": "loss", "content": 0.009992515668272972, "timestamp": "2025-09-10 02:20:58.800696", "step": 2429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:58.833800", "step": 2429, "epoch": 2 }, { "type": "loss", "content": 0.029752474278211594, "timestamp": "2025-09-10 02:20:58.840812", "step": 2430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:58.875836", "step": 2430, "epoch": 2 }, { "type": "loss", "content": 0.0017465923447161913, "timestamp": "2025-09-10 02:20:58.882412", "step": 2431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:20:58.913422", "step": 2431, "epoch": 2 }, { "type": "loss", "content": 0.02968502603471279, "timestamp": "2025-09-10 02:20:58.945044", "step": 2432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:58.982449", "step": 2432, "epoch": 2 }, { "type": "loss", "content": 0.007202841341495514, "timestamp": "2025-09-10 02:20:58.997633", "step": 2433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:59.036354", "step": 2433, "epoch": 2 }, { "type": "loss", "content": 0.028496667742729187, "timestamp": "2025-09-10 02:20:59.043146", "step": 2434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:20:59.078570", "step": 2434, "epoch": 2 }, { "type": "loss", "content": 0.010356120765209198, "timestamp": "2025-09-10 02:20:59.082482", "step": 2435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:59.118399", "step": 2435, "epoch": 2 }, { "type": "loss", "content": 0.010988089255988598, "timestamp": "2025-09-10 02:20:59.152646", "step": 2436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:20:59.189861", "step": 2436, "epoch": 2 }, { "type": "loss", "content": 0.013273806311190128, "timestamp": "2025-09-10 02:20:59.204988", "step": 2437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:59.240005", "step": 2437, "epoch": 2 }, { "type": "loss", "content": 0.0162824559956789, "timestamp": "2025-09-10 02:20:59.251543", "step": 2438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:20:59.291319", "step": 2438, "epoch": 2 }, { "type": "loss", "content": 0.015538596548140049, "timestamp": "2025-09-10 02:20:59.302867", "step": 2439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:20:59.336710", "step": 2439, "epoch": 2 }, { "type": "loss", "content": 0.024137185886502266, "timestamp": "2025-09-10 02:20:59.367489", "step": 2440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:59.399054", "step": 2440, "epoch": 2 }, { "type": "loss", "content": 0.012249810621142387, "timestamp": "2025-09-10 02:20:59.403560", "step": 2441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:59.436669", "step": 2441, "epoch": 2 }, { "type": "loss", "content": 0.014839425683021545, "timestamp": "2025-09-10 02:20:59.443398", "step": 2442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:20:59.481062", "step": 2442, "epoch": 2 }, { "type": "loss", "content": 0.024386601522564888, "timestamp": "2025-09-10 02:20:59.494734", "step": 2443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:59.526751", "step": 2443, "epoch": 2 }, { "type": "loss", "content": 0.007399399299174547, "timestamp": "2025-09-10 02:20:59.551681", "step": 2444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:20:59.584018", "step": 2444, "epoch": 2 }, { "type": "loss", "content": 0.0049104467034339905, "timestamp": "2025-09-10 02:20:59.588711", "step": 2445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:20:59.632271", "step": 2445, "epoch": 2 }, { "type": "loss", "content": 0.0064061665907502174, "timestamp": "2025-09-10 02:20:59.649910", "step": 2446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:59.683185", "step": 2446, "epoch": 2 }, { "type": "loss", "content": 0.026691416278481483, "timestamp": "2025-09-10 02:20:59.689889", "step": 2447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:59.724090", "step": 2447, "epoch": 2 }, { "type": "loss", "content": 0.004400145262479782, "timestamp": "2025-09-10 02:20:59.751749", "step": 2448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:20:59.786408", "step": 2448, "epoch": 2 }, { "type": "loss", "content": 0.002782547613605857, "timestamp": "2025-09-10 02:20:59.798614", "step": 2449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:20:59.830361", "step": 2449, "epoch": 2 }, { "type": "loss", "content": 0.030095087364315987, "timestamp": "2025-09-10 02:20:59.834411", "step": 2450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:20:59.867471", "step": 2450, "epoch": 2 }, { "type": "loss", "content": 0.004379054065793753, "timestamp": "2025-09-10 02:20:59.874120", "step": 2451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:20:59.916375", "step": 2451, "epoch": 2 }, { "type": "loss", "content": 0.009237154386937618, "timestamp": "2025-09-10 02:20:59.954598", "step": 2452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:20:59.986634", "step": 2452, "epoch": 2 }, { "type": "loss", "content": 0.013287164270877838, "timestamp": "2025-09-10 02:20:59.990682", "step": 2453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:00.022911", "step": 2453, "epoch": 2 }, { "type": "loss", "content": 0.0038054410833865404, "timestamp": "2025-09-10 02:21:00.030203", "step": 2454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:00.061572", "step": 2454, "epoch": 2 }, { "type": "loss", "content": 0.006211976520717144, "timestamp": "2025-09-10 02:21:00.065972", "step": 2455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:00.098272", "step": 2455, "epoch": 2 }, { "type": "loss", "content": 0.011952430941164494, "timestamp": "2025-09-10 02:21:00.123510", "step": 2456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:00.158649", "step": 2456, "epoch": 2 }, { "type": "loss", "content": 0.012498662807047367, "timestamp": "2025-09-10 02:21:00.163062", "step": 2457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:00.196415", "step": 2457, "epoch": 2 }, { "type": "loss", "content": 0.0178080927580595, "timestamp": "2025-09-10 02:21:00.198975", "step": 2458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:00.230365", "step": 2458, "epoch": 2 }, { "type": "loss", "content": 0.017546426504850388, "timestamp": "2025-09-10 02:21:00.237228", "step": 2459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:00.268659", "step": 2459, "epoch": 2 }, { "type": "loss", "content": 0.023515610024333, "timestamp": "2025-09-10 02:21:00.297119", "step": 2460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:00.329743", "step": 2460, "epoch": 2 }, { "type": "loss", "content": 0.0053445808589458466, "timestamp": "2025-09-10 02:21:00.338117", "step": 2461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:00.371739", "step": 2461, "epoch": 2 }, { "type": "loss", "content": 0.002676423406228423, "timestamp": "2025-09-10 02:21:00.382019", "step": 2462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:00.414165", "step": 2462, "epoch": 2 }, { "type": "loss", "content": 0.004749422427266836, "timestamp": "2025-09-10 02:21:00.421359", "step": 2463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:00.453478", "step": 2463, "epoch": 2 }, { "type": "loss", "content": 0.025678569450974464, "timestamp": "2025-09-10 02:21:00.480924", "step": 2464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:00.513252", "step": 2464, "epoch": 2 }, { "type": "loss", "content": 0.027631347998976707, "timestamp": "2025-09-10 02:21:00.518313", "step": 2465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:00.551116", "step": 2465, "epoch": 2 }, { "type": "loss", "content": 0.007423713803291321, "timestamp": "2025-09-10 02:21:00.560967", "step": 2466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:00.592568", "step": 2466, "epoch": 2 }, { "type": "loss", "content": 0.0034356131218373775, "timestamp": "2025-09-10 02:21:00.599184", "step": 2467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:00.631300", "step": 2467, "epoch": 2 }, { "type": "loss", "content": 0.0028629249427467585, "timestamp": "2025-09-10 02:21:00.664453", "step": 2468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:00.697219", "step": 2468, "epoch": 2 }, { "type": "loss", "content": 0.003994452767074108, "timestamp": "2025-09-10 02:21:00.702035", "step": 2469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:00.733794", "step": 2469, "epoch": 2 }, { "type": "loss", "content": 0.007717865519225597, "timestamp": "2025-09-10 02:21:00.740914", "step": 2470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:00.773340", "step": 2470, "epoch": 2 }, { "type": "loss", "content": 0.008052381686866283, "timestamp": "2025-09-10 02:21:00.779842", "step": 2471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:00.811917", "step": 2471, "epoch": 2 }, { "type": "loss", "content": 0.023543791845440865, "timestamp": "2025-09-10 02:21:00.839978", "step": 2472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:00.872023", "step": 2472, "epoch": 2 }, { "type": "loss", "content": 0.004500131588429213, "timestamp": "2025-09-10 02:21:00.876183", "step": 2473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:00.910614", "step": 2473, "epoch": 2 }, { "type": "loss", "content": 0.01701648160815239, "timestamp": "2025-09-10 02:21:00.923993", "step": 2474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:00.963743", "step": 2474, "epoch": 2 }, { "type": "loss", "content": 0.04145175218582153, "timestamp": "2025-09-10 02:21:00.970404", "step": 2475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:01.003069", "step": 2475, "epoch": 2 }, { "type": "loss", "content": 0.027746308594942093, "timestamp": "2025-09-10 02:21:01.036297", "step": 2476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:01.069237", "step": 2476, "epoch": 2 }, { "type": "loss", "content": 0.0057420432567596436, "timestamp": "2025-09-10 02:21:01.081920", "step": 2477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:01.114764", "step": 2477, "epoch": 2 }, { "type": "loss", "content": 0.024375727400183678, "timestamp": "2025-09-10 02:21:01.126557", "step": 2478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:01.159373", "step": 2478, "epoch": 2 }, { "type": "loss", "content": 0.004905619192868471, "timestamp": "2025-09-10 02:21:01.169020", "step": 2479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:01.200895", "step": 2479, "epoch": 2 }, { "type": "loss", "content": 0.014483463950455189, "timestamp": "2025-09-10 02:21:01.228375", "step": 2480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:01.261928", "step": 2480, "epoch": 2 }, { "type": "loss", "content": 0.015179364010691643, "timestamp": "2025-09-10 02:21:01.269896", "step": 2481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:01.302092", "step": 2481, "epoch": 2 }, { "type": "loss", "content": 0.010475664399564266, "timestamp": "2025-09-10 02:21:01.304553", "step": 2482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:01.335929", "step": 2482, "epoch": 2 }, { "type": "loss", "content": 0.008311014622449875, "timestamp": "2025-09-10 02:21:01.345565", "step": 2483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:01.383373", "step": 2483, "epoch": 2 }, { "type": "loss", "content": 0.018926413729786873, "timestamp": "2025-09-10 02:21:01.418320", "step": 2484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:01.453949", "step": 2484, "epoch": 2 }, { "type": "loss", "content": 0.008039912208914757, "timestamp": "2025-09-10 02:21:01.457516", "step": 2485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:01.494246", "step": 2485, "epoch": 2 }, { "type": "loss", "content": 0.004062869120389223, "timestamp": "2025-09-10 02:21:01.506816", "step": 2486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:01.539890", "step": 2486, "epoch": 2 }, { "type": "loss", "content": 0.016554275527596474, "timestamp": "2025-09-10 02:21:01.541912", "step": 2487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:01.573571", "step": 2487, "epoch": 2 }, { "type": "loss", "content": 0.016327768564224243, "timestamp": "2025-09-10 02:21:01.601646", "step": 2488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:01.633487", "step": 2488, "epoch": 2 }, { "type": "loss", "content": 0.0051765465177595615, "timestamp": "2025-09-10 02:21:01.635807", "step": 2489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:01.672950", "step": 2489, "epoch": 2 }, { "type": "loss", "content": 0.0068465410731732845, "timestamp": "2025-09-10 02:21:01.680286", "step": 2490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:01.713239", "step": 2490, "epoch": 2 }, { "type": "loss", "content": 0.01244097389280796, "timestamp": "2025-09-10 02:21:01.720561", "step": 2491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:01.758847", "step": 2491, "epoch": 2 }, { "type": "loss", "content": 0.0072895921766757965, "timestamp": "2025-09-10 02:21:01.791378", "step": 2492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:01.824943", "step": 2492, "epoch": 2 }, { "type": "loss", "content": 0.010245480574667454, "timestamp": "2025-09-10 02:21:01.829063", "step": 2493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:01.861585", "step": 2493, "epoch": 2 }, { "type": "loss", "content": 0.00806692149490118, "timestamp": "2025-09-10 02:21:01.873213", "step": 2494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:01.908408", "step": 2494, "epoch": 2 }, { "type": "loss", "content": 0.003785413922742009, "timestamp": "2025-09-10 02:21:01.914785", "step": 2495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:01.950757", "step": 2495, "epoch": 2 }, { "type": "loss", "content": 0.003561714431270957, "timestamp": "2025-09-10 02:21:01.978198", "step": 2496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:02.013614", "step": 2496, "epoch": 2 }, { "type": "loss", "content": 0.0025676547084003687, "timestamp": "2025-09-10 02:21:02.018682", "step": 2497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:02.059458", "step": 2497, "epoch": 2 }, { "type": "loss", "content": 0.012827993370592594, "timestamp": "2025-09-10 02:21:02.073181", "step": 2498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:02.112247", "step": 2498, "epoch": 2 }, { "type": "loss", "content": 0.010569563135504723, "timestamp": "2025-09-10 02:21:02.119588", "step": 2499, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:21:12.914739", "step": 2499, "epoch": 2 }, { "type": "pplx", "content": 18230494.697521377, "timestamp": "2025-09-10 02:21:12.919042", "step": 2499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:12.952692", "step": 2499, "epoch": 2 }, { "type": "loss", "content": 0.007842292077839375, "timestamp": "2025-09-10 02:21:12.979378", "step": 2500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 2500", "timestamp": "2025-09-10 02:21:18.083563", "step": 2500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:18.117148", "step": 2500, "epoch": 2 }, { "type": "loss", "content": 0.0012375351507216692, "timestamp": "2025-09-10 02:21:18.121456", "step": 2501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:18.155236", "step": 2501, "epoch": 2 }, { "type": "loss", "content": 0.001796129741705954, "timestamp": "2025-09-10 02:21:18.164371", "step": 2502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:18.199139", "step": 2502, "epoch": 2 }, { "type": "loss", "content": 0.01929536834359169, "timestamp": "2025-09-10 02:21:18.205478", "step": 2503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:18.240242", "step": 2503, "epoch": 2 }, { "type": "loss", "content": 0.012172207236289978, "timestamp": "2025-09-10 02:21:18.270857", "step": 2504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:18.313891", "step": 2504, "epoch": 2 }, { "type": "loss", "content": 0.012182426638901234, "timestamp": "2025-09-10 02:21:18.319073", "step": 2505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:18.352236", "step": 2505, "epoch": 2 }, { "type": "loss", "content": 0.0019687467720359564, "timestamp": "2025-09-10 02:21:18.359205", "step": 2506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:18.392271", "step": 2506, "epoch": 2 }, { "type": "loss", "content": 0.0035485646221786737, "timestamp": "2025-09-10 02:21:18.399195", "step": 2507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:21:18.434049", "step": 2507, "epoch": 2 }, { "type": "loss", "content": 0.017159203067421913, "timestamp": "2025-09-10 02:21:18.468718", "step": 2508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:18.505446", "step": 2508, "epoch": 2 }, { "type": "loss", "content": 0.00748326163738966, "timestamp": "2025-09-10 02:21:18.512742", "step": 2509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:18.549559", "step": 2509, "epoch": 2 }, { "type": "loss", "content": 0.03193259984254837, "timestamp": "2025-09-10 02:21:18.551969", "step": 2510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:18.587099", "step": 2510, "epoch": 2 }, { "type": "loss", "content": 0.01799680106341839, "timestamp": "2025-09-10 02:21:18.593659", "step": 2511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:18.628387", "step": 2511, "epoch": 2 }, { "type": "loss", "content": 0.0041481442749500275, "timestamp": "2025-09-10 02:21:18.661835", "step": 2512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:18.693026", "step": 2512, "epoch": 2 }, { "type": "loss", "content": 0.011895556934177876, "timestamp": "2025-09-10 02:21:18.695018", "step": 2513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:18.725339", "step": 2513, "epoch": 2 }, { "type": "loss", "content": 0.007325722835958004, "timestamp": "2025-09-10 02:21:18.727977", "step": 2514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:18.758656", "step": 2514, "epoch": 2 }, { "type": "loss", "content": 0.011184222996234894, "timestamp": "2025-09-10 02:21:18.770940", "step": 2515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:18.803371", "step": 2515, "epoch": 2 }, { "type": "loss", "content": 0.011311122216284275, "timestamp": "2025-09-10 02:21:18.836820", "step": 2516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:18.867500", "step": 2516, "epoch": 2 }, { "type": "loss", "content": 0.0030376592185348272, "timestamp": "2025-09-10 02:21:18.872524", "step": 2517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:18.903552", "step": 2517, "epoch": 2 }, { "type": "loss", "content": 0.013369477353990078, "timestamp": "2025-09-10 02:21:18.907979", "step": 2518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:18.938715", "step": 2518, "epoch": 2 }, { "type": "loss", "content": 0.0013479441404342651, "timestamp": "2025-09-10 02:21:18.943289", "step": 2519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:18.973618", "step": 2519, "epoch": 2 }, { "type": "loss", "content": 0.019774915650486946, "timestamp": "2025-09-10 02:21:19.001111", "step": 2520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:19.031737", "step": 2520, "epoch": 2 }, { "type": "loss", "content": 0.017613651230931282, "timestamp": "2025-09-10 02:21:19.036334", "step": 2521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:19.067319", "step": 2521, "epoch": 2 }, { "type": "loss", "content": 0.010517450049519539, "timestamp": "2025-09-10 02:21:19.073952", "step": 2522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:19.105667", "step": 2522, "epoch": 2 }, { "type": "loss", "content": 0.010430269874632359, "timestamp": "2025-09-10 02:21:19.115558", "step": 2523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:19.146127", "step": 2523, "epoch": 2 }, { "type": "loss", "content": 0.012062592431902885, "timestamp": "2025-09-10 02:21:19.179159", "step": 2524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:19.212693", "step": 2524, "epoch": 2 }, { "type": "loss", "content": 0.0021377981174737215, "timestamp": "2025-09-10 02:21:19.225778", "step": 2525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:19.259740", "step": 2525, "epoch": 2 }, { "type": "loss", "content": 0.014699029736220837, "timestamp": "2025-09-10 02:21:19.273123", "step": 2526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:19.303637", "step": 2526, "epoch": 2 }, { "type": "loss", "content": 0.00048396483180113137, "timestamp": "2025-09-10 02:21:19.307765", "step": 2527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:19.338356", "step": 2527, "epoch": 2 }, { "type": "loss", "content": 0.010418041609227657, "timestamp": "2025-09-10 02:21:19.363583", "step": 2528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:19.394714", "step": 2528, "epoch": 2 }, { "type": "loss", "content": 0.0037663152907043695, "timestamp": "2025-09-10 02:21:19.405172", "step": 2529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:19.439960", "step": 2529, "epoch": 2 }, { "type": "loss", "content": 0.006023730151355267, "timestamp": "2025-09-10 02:21:19.453673", "step": 2530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:19.484141", "step": 2530, "epoch": 2 }, { "type": "loss", "content": 0.022649195045232773, "timestamp": "2025-09-10 02:21:19.488584", "step": 2531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:19.519990", "step": 2531, "epoch": 2 }, { "type": "loss", "content": 0.008288032375276089, "timestamp": "2025-09-10 02:21:19.548032", "step": 2532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:19.579154", "step": 2532, "epoch": 2 }, { "type": "loss", "content": 0.019115403294563293, "timestamp": "2025-09-10 02:21:19.589494", "step": 2533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:19.622081", "step": 2533, "epoch": 2 }, { "type": "loss", "content": 0.005932506639510393, "timestamp": "2025-09-10 02:21:19.628835", "step": 2534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:21:19.680355", "step": 2534, "epoch": 2 }, { "type": "loss", "content": 0.013247926719486713, "timestamp": "2025-09-10 02:21:19.701849", "step": 2535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:19.732943", "step": 2535, "epoch": 2 }, { "type": "loss", "content": 0.0022640167735517025, "timestamp": "2025-09-10 02:21:19.766423", "step": 2536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:19.802874", "step": 2536, "epoch": 2 }, { "type": "loss", "content": 0.0012684384128078818, "timestamp": "2025-09-10 02:21:19.805624", "step": 2537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:19.837258", "step": 2537, "epoch": 2 }, { "type": "loss", "content": 0.007246891502290964, "timestamp": "2025-09-10 02:21:19.841698", "step": 2538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:19.876281", "step": 2538, "epoch": 2 }, { "type": "loss", "content": 0.01728993095457554, "timestamp": "2025-09-10 02:21:19.889930", "step": 2539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:19.929146", "step": 2539, "epoch": 2 }, { "type": "loss", "content": 0.011178716085851192, "timestamp": "2025-09-10 02:21:19.964026", "step": 2540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:20.000513", "step": 2540, "epoch": 2 }, { "type": "loss", "content": 0.008106366731226444, "timestamp": "2025-09-10 02:21:20.007292", "step": 2541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:20.043383", "step": 2541, "epoch": 2 }, { "type": "loss", "content": 0.02785063162446022, "timestamp": "2025-09-10 02:21:20.050102", "step": 2542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:20.084769", "step": 2542, "epoch": 2 }, { "type": "loss", "content": 0.025354115292429924, "timestamp": "2025-09-10 02:21:20.091793", "step": 2543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:20.129268", "step": 2543, "epoch": 2 }, { "type": "loss", "content": 0.0003218255878891796, "timestamp": "2025-09-10 02:21:20.155204", "step": 2544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:20.190690", "step": 2544, "epoch": 2 }, { "type": "loss", "content": 0.016497811302542686, "timestamp": "2025-09-10 02:21:20.197651", "step": 2545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:20.231970", "step": 2545, "epoch": 2 }, { "type": "loss", "content": 0.013020535930991173, "timestamp": "2025-09-10 02:21:20.236272", "step": 2546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:20.269435", "step": 2546, "epoch": 2 }, { "type": "loss", "content": 0.0023302300833165646, "timestamp": "2025-09-10 02:21:20.276990", "step": 2547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:20.308936", "step": 2547, "epoch": 2 }, { "type": "loss", "content": 0.0026004468090832233, "timestamp": "2025-09-10 02:21:20.336788", "step": 2548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:21:20.375176", "step": 2548, "epoch": 2 }, { "type": "loss", "content": 0.008805993013083935, "timestamp": "2025-09-10 02:21:20.388388", "step": 2549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:20.432408", "step": 2549, "epoch": 2 }, { "type": "loss", "content": 0.001498592202551663, "timestamp": "2025-09-10 02:21:20.440194", "step": 2550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:20.477923", "step": 2550, "epoch": 2 }, { "type": "loss", "content": 0.0017248743679374456, "timestamp": "2025-09-10 02:21:20.483230", "step": 2551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:20.528188", "step": 2551, "epoch": 2 }, { "type": "loss", "content": 0.006637393496930599, "timestamp": "2025-09-10 02:21:20.553155", "step": 2552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:20.585361", "step": 2552, "epoch": 2 }, { "type": "loss", "content": 0.045442163944244385, "timestamp": "2025-09-10 02:21:20.590283", "step": 2553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:20.622339", "step": 2553, "epoch": 2 }, { "type": "loss", "content": 0.018050571903586388, "timestamp": "2025-09-10 02:21:20.631958", "step": 2554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:20.662783", "step": 2554, "epoch": 2 }, { "type": "loss", "content": 0.0028236510697752237, "timestamp": "2025-09-10 02:21:20.669499", "step": 2555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:20.699836", "step": 2555, "epoch": 2 }, { "type": "loss", "content": 0.024217301979660988, "timestamp": "2025-09-10 02:21:20.727494", "step": 2556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:20.759827", "step": 2556, "epoch": 2 }, { "type": "loss", "content": 0.0008744286606088281, "timestamp": "2025-09-10 02:21:20.768328", "step": 2557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:20.799748", "step": 2557, "epoch": 2 }, { "type": "loss", "content": 0.0017181773437187076, "timestamp": "2025-09-10 02:21:20.806556", "step": 2558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:20.837541", "step": 2558, "epoch": 2 }, { "type": "loss", "content": 0.0028813164681196213, "timestamp": "2025-09-10 02:21:20.844912", "step": 2559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:20.876531", "step": 2559, "epoch": 2 }, { "type": "loss", "content": 0.0020341165363788605, "timestamp": "2025-09-10 02:21:20.904934", "step": 2560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:20.937659", "step": 2560, "epoch": 2 }, { "type": "loss", "content": 0.013223226182162762, "timestamp": "2025-09-10 02:21:20.943076", "step": 2561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:20.972726", "step": 2561, "epoch": 2 }, { "type": "loss", "content": 0.00022325999452732503, "timestamp": "2025-09-10 02:21:20.980421", "step": 2562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:21.010388", "step": 2562, "epoch": 2 }, { "type": "loss", "content": 0.0023189482744783163, "timestamp": "2025-09-10 02:21:21.018006", "step": 2563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:21.048385", "step": 2563, "epoch": 2 }, { "type": "loss", "content": 0.025371316820383072, "timestamp": "2025-09-10 02:21:21.076165", "step": 2564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:21.107692", "step": 2564, "epoch": 2 }, { "type": "loss", "content": 0.001871153013780713, "timestamp": "2025-09-10 02:21:21.117495", "step": 2565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:21.151995", "step": 2565, "epoch": 2 }, { "type": "loss", "content": 0.02054077573120594, "timestamp": "2025-09-10 02:21:21.159853", "step": 2566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:21.191020", "step": 2566, "epoch": 2 }, { "type": "loss", "content": 0.016704251989722252, "timestamp": "2025-09-10 02:21:21.194881", "step": 2567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:21.226260", "step": 2567, "epoch": 2 }, { "type": "loss", "content": 0.010114437900483608, "timestamp": "2025-09-10 02:21:21.254592", "step": 2568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:21.285689", "step": 2568, "epoch": 2 }, { "type": "loss", "content": 0.009702653624117374, "timestamp": "2025-09-10 02:21:21.287904", "step": 2569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:21.317390", "step": 2569, "epoch": 2 }, { "type": "loss", "content": 0.002314184093847871, "timestamp": "2025-09-10 02:21:21.322059", "step": 2570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:21.351928", "step": 2570, "epoch": 2 }, { "type": "loss", "content": 0.0005179584841243923, "timestamp": "2025-09-10 02:21:21.355937", "step": 2571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:21.386239", "step": 2571, "epoch": 2 }, { "type": "loss", "content": 0.00541424797847867, "timestamp": "2025-09-10 02:21:21.414415", "step": 2572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:21.445599", "step": 2572, "epoch": 2 }, { "type": "loss", "content": 0.001489723101258278, "timestamp": "2025-09-10 02:21:21.449809", "step": 2573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:21.481471", "step": 2573, "epoch": 2 }, { "type": "loss", "content": 0.004314497113227844, "timestamp": "2025-09-10 02:21:21.485844", "step": 2574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:21.519739", "step": 2574, "epoch": 2 }, { "type": "loss", "content": 0.00981599185615778, "timestamp": "2025-09-10 02:21:21.533143", "step": 2575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:21.563663", "step": 2575, "epoch": 2 }, { "type": "loss", "content": 0.004870929755270481, "timestamp": "2025-09-10 02:21:21.591404", "step": 2576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:21.622364", "step": 2576, "epoch": 2 }, { "type": "loss", "content": 0.016966963186860085, "timestamp": "2025-09-10 02:21:21.632658", "step": 2577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:21.667842", "step": 2577, "epoch": 2 }, { "type": "loss", "content": 0.00209718756377697, "timestamp": "2025-09-10 02:21:21.681559", "step": 2578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:21.712192", "step": 2578, "epoch": 2 }, { "type": "loss", "content": 0.006372584495693445, "timestamp": "2025-09-10 02:21:21.719248", "step": 2579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:21.750187", "step": 2579, "epoch": 2 }, { "type": "loss", "content": 0.0025949012488126755, "timestamp": "2025-09-10 02:21:21.778358", "step": 2580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:21.808438", "step": 2580, "epoch": 2 }, { "type": "loss", "content": 0.02052624709904194, "timestamp": "2025-09-10 02:21:21.812988", "step": 2581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:21:21.858702", "step": 2581, "epoch": 2 }, { "type": "loss", "content": 0.00807008147239685, "timestamp": "2025-09-10 02:21:21.877864", "step": 2582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:21.909123", "step": 2582, "epoch": 2 }, { "type": "loss", "content": 0.018544618040323257, "timestamp": "2025-09-10 02:21:21.920247", "step": 2583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:21.951880", "step": 2583, "epoch": 2 }, { "type": "loss", "content": 0.005954326130449772, "timestamp": "2025-09-10 02:21:21.983423", "step": 2584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:22.015401", "step": 2584, "epoch": 2 }, { "type": "loss", "content": 0.010289547964930534, "timestamp": "2025-09-10 02:21:22.017574", "step": 2585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:22.055532", "step": 2585, "epoch": 2 }, { "type": "loss", "content": 0.002948526758700609, "timestamp": "2025-09-10 02:21:22.068927", "step": 2586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:22.101655", "step": 2586, "epoch": 2 }, { "type": "loss", "content": 0.0275122057646513, "timestamp": "2025-09-10 02:21:22.114202", "step": 2587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:22.152963", "step": 2587, "epoch": 2 }, { "type": "loss", "content": 0.0017236763378605247, "timestamp": "2025-09-10 02:21:22.189514", "step": 2588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:22.220119", "step": 2588, "epoch": 2 }, { "type": "loss", "content": 0.016498176380991936, "timestamp": "2025-09-10 02:21:22.227995", "step": 2589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:22.259287", "step": 2589, "epoch": 2 }, { "type": "loss", "content": 0.016592005267739296, "timestamp": "2025-09-10 02:21:22.271498", "step": 2590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:22.301955", "step": 2590, "epoch": 2 }, { "type": "loss", "content": 0.0010614178609102964, "timestamp": "2025-09-10 02:21:22.314305", "step": 2591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:22.346152", "step": 2591, "epoch": 2 }, { "type": "loss", "content": 0.009229181334376335, "timestamp": "2025-09-10 02:21:22.374266", "step": 2592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:22.404849", "step": 2592, "epoch": 2 }, { "type": "loss", "content": 0.001209449372254312, "timestamp": "2025-09-10 02:21:22.409505", "step": 2593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:22.441174", "step": 2593, "epoch": 2 }, { "type": "loss", "content": 0.010440163314342499, "timestamp": "2025-09-10 02:21:22.448846", "step": 2594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:22.480478", "step": 2594, "epoch": 2 }, { "type": "loss", "content": 0.0017658992437645793, "timestamp": "2025-09-10 02:21:22.487049", "step": 2595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:22.517815", "step": 2595, "epoch": 2 }, { "type": "loss", "content": 0.0006559863686561584, "timestamp": "2025-09-10 02:21:22.546183", "step": 2596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:22.581657", "step": 2596, "epoch": 2 }, { "type": "loss", "content": 0.015221442095935345, "timestamp": "2025-09-10 02:21:22.596787", "step": 2597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:22.628337", "step": 2597, "epoch": 2 }, { "type": "loss", "content": 0.0013818376464769244, "timestamp": "2025-09-10 02:21:22.636111", "step": 2598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:22.675626", "step": 2598, "epoch": 2 }, { "type": "loss", "content": 0.0057688066735863686, "timestamp": "2025-09-10 02:21:22.682673", "step": 2599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:22.713742", "step": 2599, "epoch": 2 }, { "type": "loss", "content": 0.0062509505078196526, "timestamp": "2025-09-10 02:21:22.738675", "step": 2600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:22.772019", "step": 2600, "epoch": 2 }, { "type": "loss", "content": 0.01174467708915472, "timestamp": "2025-09-10 02:21:22.774363", "step": 2601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:22.806500", "step": 2601, "epoch": 2 }, { "type": "loss", "content": 0.026152905076742172, "timestamp": "2025-09-10 02:21:22.814030", "step": 2602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:22.845107", "step": 2602, "epoch": 2 }, { "type": "loss", "content": 0.0017859925283119082, "timestamp": "2025-09-10 02:21:22.852090", "step": 2603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:22.883885", "step": 2603, "epoch": 2 }, { "type": "loss", "content": 0.002031368436291814, "timestamp": "2025-09-10 02:21:22.912569", "step": 2604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:21:22.949939", "step": 2604, "epoch": 2 }, { "type": "loss", "content": 0.004424991551786661, "timestamp": "2025-09-10 02:21:22.965400", "step": 2605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:22.999527", "step": 2605, "epoch": 2 }, { "type": "loss", "content": 0.032836418598890305, "timestamp": "2025-09-10 02:21:23.006892", "step": 2606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:23.042573", "step": 2606, "epoch": 2 }, { "type": "loss", "content": 0.0030306854750961065, "timestamp": "2025-09-10 02:21:23.055995", "step": 2607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:23.087563", "step": 2607, "epoch": 2 }, { "type": "loss", "content": 0.0008659110171720386, "timestamp": "2025-09-10 02:21:23.115941", "step": 2608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:23.148036", "step": 2608, "epoch": 2 }, { "type": "loss", "content": 0.0008872836478985846, "timestamp": "2025-09-10 02:21:23.158399", "step": 2609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:23.199773", "step": 2609, "epoch": 2 }, { "type": "loss", "content": 0.0002644038759171963, "timestamp": "2025-09-10 02:21:23.204388", "step": 2610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.242060", "step": 2610, "epoch": 2 }, { "type": "loss", "content": 0.008746746927499771, "timestamp": "2025-09-10 02:21:23.249813", "step": 2611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:23.284421", "step": 2611, "epoch": 2 }, { "type": "loss", "content": 0.0010678042890504003, "timestamp": "2025-09-10 02:21:23.312325", "step": 2612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.347381", "step": 2612, "epoch": 2 }, { "type": "loss", "content": 0.006162055768072605, "timestamp": "2025-09-10 02:21:23.352904", "step": 2613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.383547", "step": 2613, "epoch": 2 }, { "type": "loss", "content": 0.012832626700401306, "timestamp": "2025-09-10 02:21:23.391370", "step": 2614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:23.423232", "step": 2614, "epoch": 2 }, { "type": "loss", "content": 0.0061977319419384, "timestamp": "2025-09-10 02:21:23.430799", "step": 2615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:23.464416", "step": 2615, "epoch": 2 }, { "type": "loss", "content": 0.0003420994326006621, "timestamp": "2025-09-10 02:21:23.489618", "step": 2616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.521959", "step": 2616, "epoch": 2 }, { "type": "loss", "content": 0.004029420204460621, "timestamp": "2025-09-10 02:21:23.528022", "step": 2617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:23.567525", "step": 2617, "epoch": 2 }, { "type": "loss", "content": 0.007904608733952045, "timestamp": "2025-09-10 02:21:23.580897", "step": 2618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:23.617531", "step": 2618, "epoch": 2 }, { "type": "loss", "content": 0.056298431009054184, "timestamp": "2025-09-10 02:21:23.624513", "step": 2619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:23.657783", "step": 2619, "epoch": 2 }, { "type": "loss", "content": 0.04149520769715309, "timestamp": "2025-09-10 02:21:23.682686", "step": 2620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:23.714333", "step": 2620, "epoch": 2 }, { "type": "loss", "content": 0.013901514001190662, "timestamp": "2025-09-10 02:21:23.723015", "step": 2621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.754388", "step": 2621, "epoch": 2 }, { "type": "loss", "content": 0.005170899443328381, "timestamp": "2025-09-10 02:21:23.762098", "step": 2622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:23.803781", "step": 2622, "epoch": 2 }, { "type": "loss", "content": 0.011854954063892365, "timestamp": "2025-09-10 02:21:23.817179", "step": 2623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:23.854925", "step": 2623, "epoch": 2 }, { "type": "loss", "content": 0.022033916786313057, "timestamp": "2025-09-10 02:21:23.883562", "step": 2624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:23.915994", "step": 2624, "epoch": 2 }, { "type": "loss", "content": 0.0034682333935052156, "timestamp": "2025-09-10 02:21:23.920954", "step": 2625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:23.959395", "step": 2625, "epoch": 2 }, { "type": "loss", "content": 0.0019165745470672846, "timestamp": "2025-09-10 02:21:23.966272", "step": 2626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:24.001785", "step": 2626, "epoch": 2 }, { "type": "loss", "content": 0.0004640703264158219, "timestamp": "2025-09-10 02:21:24.014321", "step": 2627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:24.045683", "step": 2627, "epoch": 2 }, { "type": "loss", "content": 0.0022274174261838198, "timestamp": "2025-09-10 02:21:24.073515", "step": 2628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:24.106052", "step": 2628, "epoch": 2 }, { "type": "loss", "content": 0.006261548958718777, "timestamp": "2025-09-10 02:21:24.111226", "step": 2629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:24.142328", "step": 2629, "epoch": 2 }, { "type": "loss", "content": 0.0015995798166841269, "timestamp": "2025-09-10 02:21:24.149177", "step": 2630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:24.181390", "step": 2630, "epoch": 2 }, { "type": "loss", "content": 0.002429540967568755, "timestamp": "2025-09-10 02:21:24.188917", "step": 2631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:24.219910", "step": 2631, "epoch": 2 }, { "type": "loss", "content": 0.00019681244157254696, "timestamp": "2025-09-10 02:21:24.247913", "step": 2632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:24.280978", "step": 2632, "epoch": 2 }, { "type": "loss", "content": 0.007192966062575579, "timestamp": "2025-09-10 02:21:24.283097", "step": 2633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:24.319155", "step": 2633, "epoch": 2 }, { "type": "loss", "content": 0.007892182096838951, "timestamp": "2025-09-10 02:21:24.332869", "step": 2634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:24.365336", "step": 2634, "epoch": 2 }, { "type": "loss", "content": 0.0011840269435197115, "timestamp": "2025-09-10 02:21:24.372246", "step": 2635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:24.403970", "step": 2635, "epoch": 2 }, { "type": "loss", "content": 0.0014801176730543375, "timestamp": "2025-09-10 02:21:24.432277", "step": 2636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:24.462772", "step": 2636, "epoch": 2 }, { "type": "loss", "content": 0.005055623594671488, "timestamp": "2025-09-10 02:21:24.468042", "step": 2637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:24.499747", "step": 2637, "epoch": 2 }, { "type": "loss", "content": 0.0026747877709567547, "timestamp": "2025-09-10 02:21:24.511764", "step": 2638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:24.541779", "step": 2638, "epoch": 2 }, { "type": "loss", "content": 0.005939009133726358, "timestamp": "2025-09-10 02:21:24.544499", "step": 2639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:24.575720", "step": 2639, "epoch": 2 }, { "type": "loss", "content": 0.021643701940774918, "timestamp": "2025-09-10 02:21:24.603541", "step": 2640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:24.634852", "step": 2640, "epoch": 2 }, { "type": "loss", "content": 0.01125361304730177, "timestamp": "2025-09-10 02:21:24.639827", "step": 2641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:24.670124", "step": 2641, "epoch": 2 }, { "type": "loss", "content": 0.010432683862745762, "timestamp": "2025-09-10 02:21:24.672783", "step": 2642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:24.703885", "step": 2642, "epoch": 2 }, { "type": "loss", "content": 0.003385532647371292, "timestamp": "2025-09-10 02:21:24.710428", "step": 2643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:24.741330", "step": 2643, "epoch": 2 }, { "type": "loss", "content": 0.0025549919810146093, "timestamp": "2025-09-10 02:21:24.769360", "step": 2644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:24.800848", "step": 2644, "epoch": 2 }, { "type": "loss", "content": 0.0029829232953488827, "timestamp": "2025-09-10 02:21:24.805464", "step": 2645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:24.841850", "step": 2645, "epoch": 2 }, { "type": "loss", "content": 0.0008848052239045501, "timestamp": "2025-09-10 02:21:24.852115", "step": 2646, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:21:35.133425", "step": 2646, "epoch": 2 }, { "type": "pplx", "content": 18574059.11035138, "timestamp": "2025-09-10 02:21:35.136168", "step": 2646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:35.167002", "step": 2646, "epoch": 2 }, { "type": "loss", "content": 0.0029080223757773638, "timestamp": "2025-09-10 02:21:35.172975", "step": 2647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:35.204002", "step": 2647, "epoch": 2 }, { "type": "loss", "content": 0.0013911023270338774, "timestamp": "2025-09-10 02:21:35.236493", "step": 2648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:35.268728", "step": 2648, "epoch": 2 }, { "type": "loss", "content": 0.001914841472171247, "timestamp": "2025-09-10 02:21:35.276255", "step": 2649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:35.309084", "step": 2649, "epoch": 2 }, { "type": "loss", "content": 0.017167022451758385, "timestamp": "2025-09-10 02:21:35.318997", "step": 2650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:35.350982", "step": 2650, "epoch": 2 }, { "type": "loss", "content": 0.003129334654659033, "timestamp": "2025-09-10 02:21:35.357928", "step": 2651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:35.392583", "step": 2651, "epoch": 2 }, { "type": "loss", "content": 0.0010660120751708746, "timestamp": "2025-09-10 02:21:35.427184", "step": 2652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:35.461172", "step": 2652, "epoch": 2 }, { "type": "loss", "content": 0.0017405982362106442, "timestamp": "2025-09-10 02:21:35.474502", "step": 2653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:35.505765", "step": 2653, "epoch": 2 }, { "type": "loss", "content": 0.0052419803105294704, "timestamp": "2025-09-10 02:21:35.512620", "step": 2654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:35.542954", "step": 2654, "epoch": 2 }, { "type": "loss", "content": 0.008888996206223965, "timestamp": "2025-09-10 02:21:35.547239", "step": 2655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:35.585764", "step": 2655, "epoch": 2 }, { "type": "loss", "content": 0.0006996404263190925, "timestamp": "2025-09-10 02:21:35.622348", "step": 2656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:35.653762", "step": 2656, "epoch": 2 }, { "type": "loss", "content": 0.013661734759807587, "timestamp": "2025-09-10 02:21:35.655988", "step": 2657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:35.686175", "step": 2657, "epoch": 2 }, { "type": "loss", "content": 0.007183533161878586, "timestamp": "2025-09-10 02:21:35.690706", "step": 2658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:35.723778", "step": 2658, "epoch": 2 }, { "type": "loss", "content": 0.059785980731248856, "timestamp": "2025-09-10 02:21:35.733799", "step": 2659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:35.765142", "step": 2659, "epoch": 2 }, { "type": "loss", "content": 0.0015089567750692368, "timestamp": "2025-09-10 02:21:35.796304", "step": 2660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:35.831263", "step": 2660, "epoch": 2 }, { "type": "loss", "content": 0.0020707848016172647, "timestamp": "2025-09-10 02:21:35.835764", "step": 2661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:35.873478", "step": 2661, "epoch": 2 }, { "type": "loss", "content": 0.0033768482971936464, "timestamp": "2025-09-10 02:21:35.882373", "step": 2662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:35.921734", "step": 2662, "epoch": 2 }, { "type": "loss", "content": 0.00554437842220068, "timestamp": "2025-09-10 02:21:35.931603", "step": 2663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:35.970955", "step": 2663, "epoch": 2 }, { "type": "loss", "content": 0.008375253528356552, "timestamp": "2025-09-10 02:21:36.003311", "step": 2664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:36.049885", "step": 2664, "epoch": 2 }, { "type": "loss", "content": 0.003926243167370558, "timestamp": "2025-09-10 02:21:36.055742", "step": 2665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:36.101046", "step": 2665, "epoch": 2 }, { "type": "loss", "content": 0.0011568154441192746, "timestamp": "2025-09-10 02:21:36.108919", "step": 2666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:36.160206", "step": 2666, "epoch": 2 }, { "type": "loss", "content": 0.0036792331375181675, "timestamp": "2025-09-10 02:21:36.167431", "step": 2667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:36.223847", "step": 2667, "epoch": 2 }, { "type": "loss", "content": 0.0026783072389662266, "timestamp": "2025-09-10 02:21:36.253011", "step": 2668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:36.303802", "step": 2668, "epoch": 2 }, { "type": "loss", "content": 0.0006417171680368483, "timestamp": "2025-09-10 02:21:36.317108", "step": 2669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:36.378980", "step": 2669, "epoch": 2 }, { "type": "loss", "content": 0.01134135015308857, "timestamp": "2025-09-10 02:21:36.392637", "step": 2670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:36.435220", "step": 2670, "epoch": 2 }, { "type": "loss", "content": 0.004530813079327345, "timestamp": "2025-09-10 02:21:36.446214", "step": 2671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:36.481707", "step": 2671, "epoch": 2 }, { "type": "loss", "content": 0.0019003379857167602, "timestamp": "2025-09-10 02:21:36.506514", "step": 2672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:36.537494", "step": 2672, "epoch": 2 }, { "type": "loss", "content": 0.009907567873597145, "timestamp": "2025-09-10 02:21:36.541757", "step": 2673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:36.575079", "step": 2673, "epoch": 2 }, { "type": "loss", "content": 0.0017410024302080274, "timestamp": "2025-09-10 02:21:36.582406", "step": 2674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:36.613848", "step": 2674, "epoch": 2 }, { "type": "loss", "content": 0.0021735227201133966, "timestamp": "2025-09-10 02:21:36.624645", "step": 2675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:36.656011", "step": 2675, "epoch": 2 }, { "type": "loss", "content": 0.0014428169233724475, "timestamp": "2025-09-10 02:21:36.683637", "step": 2676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:36.718504", "step": 2676, "epoch": 2 }, { "type": "loss", "content": 0.00035420857602730393, "timestamp": "2025-09-10 02:21:36.724320", "step": 2677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:36.764578", "step": 2677, "epoch": 2 }, { "type": "loss", "content": 0.004614558536559343, "timestamp": "2025-09-10 02:21:36.780152", "step": 2678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:36.818076", "step": 2678, "epoch": 2 }, { "type": "loss", "content": 0.006453771609812975, "timestamp": "2025-09-10 02:21:36.824490", "step": 2679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:36.872335", "step": 2679, "epoch": 2 }, { "type": "loss", "content": 0.034508321434259415, "timestamp": "2025-09-10 02:21:36.903162", "step": 2680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:36.960264", "step": 2680, "epoch": 2 }, { "type": "loss", "content": 0.011013428680598736, "timestamp": "2025-09-10 02:21:36.965747", "step": 2681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:37.008533", "step": 2681, "epoch": 2 }, { "type": "loss", "content": 0.015115066431462765, "timestamp": "2025-09-10 02:21:37.015238", "step": 2682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:37.055473", "step": 2682, "epoch": 2 }, { "type": "loss", "content": 0.00047675202949903905, "timestamp": "2025-09-10 02:21:37.060861", "step": 2683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:37.100431", "step": 2683, "epoch": 2 }, { "type": "loss", "content": 0.03187503293156624, "timestamp": "2025-09-10 02:21:37.127892", "step": 2684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:37.174265", "step": 2684, "epoch": 2 }, { "type": "loss", "content": 0.0006218705675564706, "timestamp": "2025-09-10 02:21:37.182305", "step": 2685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:37.213798", "step": 2685, "epoch": 2 }, { "type": "loss", "content": 0.0013111613225191832, "timestamp": "2025-09-10 02:21:37.218321", "step": 2686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:37.248961", "step": 2686, "epoch": 2 }, { "type": "loss", "content": 0.007048290688544512, "timestamp": "2025-09-10 02:21:37.255708", "step": 2687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:37.287452", "step": 2687, "epoch": 2 }, { "type": "loss", "content": 0.0017233153339475393, "timestamp": "2025-09-10 02:21:37.315254", "step": 2688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:37.348112", "step": 2688, "epoch": 2 }, { "type": "loss", "content": 0.0018436602549627423, "timestamp": "2025-09-10 02:21:37.355939", "step": 2689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:37.394439", "step": 2689, "epoch": 2 }, { "type": "loss", "content": 0.01839214749634266, "timestamp": "2025-09-10 02:21:37.402313", "step": 2690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:37.437257", "step": 2690, "epoch": 2 }, { "type": "loss", "content": 0.006622764747589827, "timestamp": "2025-09-10 02:21:37.444049", "step": 2691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:37.479721", "step": 2691, "epoch": 2 }, { "type": "loss", "content": 0.007068789564073086, "timestamp": "2025-09-10 02:21:37.507432", "step": 2692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:37.544360", "step": 2692, "epoch": 2 }, { "type": "loss", "content": 0.0019901886116713285, "timestamp": "2025-09-10 02:21:37.548756", "step": 2693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:37.586064", "step": 2693, "epoch": 2 }, { "type": "loss", "content": 0.0049681165255606174, "timestamp": "2025-09-10 02:21:37.590610", "step": 2694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:37.621080", "step": 2694, "epoch": 2 }, { "type": "loss", "content": 0.0012007238110527396, "timestamp": "2025-09-10 02:21:37.625231", "step": 2695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:37.655813", "step": 2695, "epoch": 2 }, { "type": "loss", "content": 0.008289673365652561, "timestamp": "2025-09-10 02:21:37.681152", "step": 2696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:37.712672", "step": 2696, "epoch": 2 }, { "type": "loss", "content": 0.0011843375395983458, "timestamp": "2025-09-10 02:21:37.714804", "step": 2697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:37.746400", "step": 2697, "epoch": 2 }, { "type": "loss", "content": 0.0065173497423529625, "timestamp": "2025-09-10 02:21:37.756744", "step": 2698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:37.787428", "step": 2698, "epoch": 2 }, { "type": "loss", "content": 0.00036999728763476014, "timestamp": "2025-09-10 02:21:37.790044", "step": 2699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:37.827483", "step": 2699, "epoch": 2 }, { "type": "loss", "content": 0.01047492679208517, "timestamp": "2025-09-10 02:21:37.855289", "step": 2700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:37.908537", "step": 2700, "epoch": 2 }, { "type": "loss", "content": 0.0033648067619651556, "timestamp": "2025-09-10 02:21:37.923693", "step": 2701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:38.005482", "step": 2701, "epoch": 2 }, { "type": "loss", "content": 0.001408770913258195, "timestamp": "2025-09-10 02:21:38.022992", "step": 2702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:38.106573", "step": 2702, "epoch": 2 }, { "type": "loss", "content": 0.011482964269816875, "timestamp": "2025-09-10 02:21:38.117121", "step": 2703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:38.165908", "step": 2703, "epoch": 2 }, { "type": "loss", "content": 0.0008962144493125379, "timestamp": "2025-09-10 02:21:38.204532", "step": 2704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:38.289760", "step": 2704, "epoch": 2 }, { "type": "loss", "content": 0.0022653231862932444, "timestamp": "2025-09-10 02:21:38.294943", "step": 2705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:38.365940", "step": 2705, "epoch": 2 }, { "type": "loss", "content": 0.013882993720471859, "timestamp": "2025-09-10 02:21:38.383012", "step": 2706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:38.457044", "step": 2706, "epoch": 2 }, { "type": "loss", "content": 0.001253266236744821, "timestamp": "2025-09-10 02:21:38.464033", "step": 2707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:38.543788", "step": 2707, "epoch": 2 }, { "type": "loss", "content": 0.0013703681761398911, "timestamp": "2025-09-10 02:21:38.575094", "step": 2708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:38.622236", "step": 2708, "epoch": 2 }, { "type": "loss", "content": 0.0033048451878130436, "timestamp": "2025-09-10 02:21:38.630211", "step": 2709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:38.675604", "step": 2709, "epoch": 2 }, { "type": "loss", "content": 0.0006388475303538144, "timestamp": "2025-09-10 02:21:38.682726", "step": 2710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:38.721884", "step": 2710, "epoch": 2 }, { "type": "loss", "content": 0.018123749643564224, "timestamp": "2025-09-10 02:21:38.729692", "step": 2711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:38.783696", "step": 2711, "epoch": 2 }, { "type": "loss", "content": 0.003044202458113432, "timestamp": "2025-09-10 02:21:38.816789", "step": 2712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:38.862713", "step": 2712, "epoch": 2 }, { "type": "loss", "content": 0.0013637479860335588, "timestamp": "2025-09-10 02:21:38.875394", "step": 2713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:38.929319", "step": 2713, "epoch": 2 }, { "type": "loss", "content": 0.0018133390694856644, "timestamp": "2025-09-10 02:21:38.940358", "step": 2714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:21:39.009625", "step": 2714, "epoch": 2 }, { "type": "loss", "content": 0.01170498225837946, "timestamp": "2025-09-10 02:21:39.033076", "step": 2715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:39.075710", "step": 2715, "epoch": 2 }, { "type": "loss", "content": 0.0006123408675193787, "timestamp": "2025-09-10 02:21:39.103674", "step": 2716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:39.153884", "step": 2716, "epoch": 2 }, { "type": "loss", "content": 0.0004910778952762485, "timestamp": "2025-09-10 02:21:39.159177", "step": 2717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:39.204546", "step": 2717, "epoch": 2 }, { "type": "loss", "content": 0.0005400904337875545, "timestamp": "2025-09-10 02:21:39.212357", "step": 2718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:39.265552", "step": 2718, "epoch": 2 }, { "type": "loss", "content": 0.02437109872698784, "timestamp": "2025-09-10 02:21:39.272686", "step": 2719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:39.311229", "step": 2719, "epoch": 2 }, { "type": "loss", "content": 0.003212881973013282, "timestamp": "2025-09-10 02:21:39.339010", "step": 2720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:39.389143", "step": 2720, "epoch": 2 }, { "type": "loss", "content": 0.004050101153552532, "timestamp": "2025-09-10 02:21:39.399713", "step": 2721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:39.441377", "step": 2721, "epoch": 2 }, { "type": "loss", "content": 0.0005341669311746955, "timestamp": "2025-09-10 02:21:39.448999", "step": 2722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:39.488414", "step": 2722, "epoch": 2 }, { "type": "loss", "content": 0.0020567751489579678, "timestamp": "2025-09-10 02:21:39.496393", "step": 2723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:39.534121", "step": 2723, "epoch": 2 }, { "type": "loss", "content": 0.0017389410641044378, "timestamp": "2025-09-10 02:21:39.562692", "step": 2724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:39.640851", "step": 2724, "epoch": 2 }, { "type": "loss", "content": 0.047998156398534775, "timestamp": "2025-09-10 02:21:39.658482", "step": 2725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:39.726652", "step": 2725, "epoch": 2 }, { "type": "loss", "content": 0.0006225144607014954, "timestamp": "2025-09-10 02:21:39.743044", "step": 2726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:39.810940", "step": 2726, "epoch": 2 }, { "type": "loss", "content": 0.0033620852045714855, "timestamp": "2025-09-10 02:21:39.827193", "step": 2727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:39.890724", "step": 2727, "epoch": 2 }, { "type": "loss", "content": 0.0009153550490736961, "timestamp": "2025-09-10 02:21:39.915738", "step": 2728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:39.965315", "step": 2728, "epoch": 2 }, { "type": "loss", "content": 0.0016450297553092241, "timestamp": "2025-09-10 02:21:39.973894", "step": 2729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:40.010673", "step": 2729, "epoch": 2 }, { "type": "loss", "content": 0.00031730628688819706, "timestamp": "2025-09-10 02:21:40.017831", "step": 2730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:40.051540", "step": 2730, "epoch": 2 }, { "type": "loss", "content": 0.05042002350091934, "timestamp": "2025-09-10 02:21:40.059029", "step": 2731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.097441", "step": 2731, "epoch": 2 }, { "type": "loss", "content": 0.0009090257226489484, "timestamp": "2025-09-10 02:21:40.125225", "step": 2732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:40.168073", "step": 2732, "epoch": 2 }, { "type": "loss", "content": 0.011402477510273457, "timestamp": "2025-09-10 02:21:40.178050", "step": 2733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:21:40.236822", "step": 2733, "epoch": 2 }, { "type": "loss", "content": 0.006502915173768997, "timestamp": "2025-09-10 02:21:40.254504", "step": 2734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:40.294820", "step": 2734, "epoch": 2 }, { "type": "loss", "content": 0.04736243933439255, "timestamp": "2025-09-10 02:21:40.301645", "step": 2735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.339970", "step": 2735, "epoch": 2 }, { "type": "loss", "content": 0.0024216361343860626, "timestamp": "2025-09-10 02:21:40.367094", "step": 2736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:40.400543", "step": 2736, "epoch": 2 }, { "type": "loss", "content": 0.004260449670255184, "timestamp": "2025-09-10 02:21:40.404660", "step": 2737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.436564", "step": 2737, "epoch": 2 }, { "type": "loss", "content": 0.0016414711717516184, "timestamp": "2025-09-10 02:21:40.442927", "step": 2738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.473988", "step": 2738, "epoch": 2 }, { "type": "loss", "content": 0.0007454793085344136, "timestamp": "2025-09-10 02:21:40.480752", "step": 2739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.511868", "step": 2739, "epoch": 2 }, { "type": "loss", "content": 0.025238368660211563, "timestamp": "2025-09-10 02:21:40.539369", "step": 2740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:40.573420", "step": 2740, "epoch": 2 }, { "type": "loss", "content": 0.0014565506717190146, "timestamp": "2025-09-10 02:21:40.581040", "step": 2741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:40.614852", "step": 2741, "epoch": 2 }, { "type": "loss", "content": 0.0028659238014370203, "timestamp": "2025-09-10 02:21:40.628243", "step": 2742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:40.659481", "step": 2742, "epoch": 2 }, { "type": "loss", "content": 0.005315977614372969, "timestamp": "2025-09-10 02:21:40.666068", "step": 2743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:40.697352", "step": 2743, "epoch": 2 }, { "type": "loss", "content": 0.0011978724505752325, "timestamp": "2025-09-10 02:21:40.725029", "step": 2744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:40.757583", "step": 2744, "epoch": 2 }, { "type": "loss", "content": 0.0008306491072289646, "timestamp": "2025-09-10 02:21:40.770565", "step": 2745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:40.801906", "step": 2745, "epoch": 2 }, { "type": "loss", "content": 0.002079846104606986, "timestamp": "2025-09-10 02:21:40.808974", "step": 2746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:40.840990", "step": 2746, "epoch": 2 }, { "type": "loss", "content": 0.013913876377046108, "timestamp": "2025-09-10 02:21:40.845922", "step": 2747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:40.884272", "step": 2747, "epoch": 2 }, { "type": "loss", "content": 0.0014017752837389708, "timestamp": "2025-09-10 02:21:40.915824", "step": 2748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:40.952577", "step": 2748, "epoch": 2 }, { "type": "loss", "content": 0.018380844965577126, "timestamp": "2025-09-10 02:21:40.957550", "step": 2749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:40.991539", "step": 2749, "epoch": 2 }, { "type": "loss", "content": 0.04001007229089737, "timestamp": "2025-09-10 02:21:41.001708", "step": 2750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:41.039352", "step": 2750, "epoch": 2 }, { "type": "loss", "content": 0.0020031663589179516, "timestamp": "2025-09-10 02:21:41.049458", "step": 2751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:41.089957", "step": 2751, "epoch": 2 }, { "type": "loss", "content": 0.0007604836719110608, "timestamp": "2025-09-10 02:21:41.118330", "step": 2752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:41.155487", "step": 2752, "epoch": 2 }, { "type": "loss", "content": 0.0003465786576271057, "timestamp": "2025-09-10 02:21:41.165491", "step": 2753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:41.202972", "step": 2753, "epoch": 2 }, { "type": "loss", "content": 0.0002132646186510101, "timestamp": "2025-09-10 02:21:41.209606", "step": 2754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:41.248213", "step": 2754, "epoch": 2 }, { "type": "loss", "content": 0.0009989741956815124, "timestamp": "2025-09-10 02:21:41.255377", "step": 2755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:41.297555", "step": 2755, "epoch": 2 }, { "type": "loss", "content": 0.006549767684191465, "timestamp": "2025-09-10 02:21:41.325684", "step": 2756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:41.364478", "step": 2756, "epoch": 2 }, { "type": "loss", "content": 0.001987830735743046, "timestamp": "2025-09-10 02:21:41.366592", "step": 2757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:41.402406", "step": 2757, "epoch": 2 }, { "type": "loss", "content": 0.006204267032444477, "timestamp": "2025-09-10 02:21:41.408956", "step": 2758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:41.446821", "step": 2758, "epoch": 2 }, { "type": "loss", "content": 0.01345762424170971, "timestamp": "2025-09-10 02:21:41.456689", "step": 2759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:41.491228", "step": 2759, "epoch": 2 }, { "type": "loss", "content": 0.002160031348466873, "timestamp": "2025-09-10 02:21:41.524617", "step": 2760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:41.555466", "step": 2760, "epoch": 2 }, { "type": "loss", "content": 0.0004899486084468663, "timestamp": "2025-09-10 02:21:41.557921", "step": 2761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:41.588416", "step": 2761, "epoch": 2 }, { "type": "loss", "content": 0.002803497016429901, "timestamp": "2025-09-10 02:21:41.595467", "step": 2762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:41.625790", "step": 2762, "epoch": 2 }, { "type": "loss", "content": 0.0015402629505842924, "timestamp": "2025-09-10 02:21:41.632695", "step": 2763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:41.663176", "step": 2763, "epoch": 2 }, { "type": "loss", "content": 0.005135064013302326, "timestamp": "2025-09-10 02:21:41.691459", "step": 2764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:41.722179", "step": 2764, "epoch": 2 }, { "type": "loss", "content": 0.0008388920687139034, "timestamp": "2025-09-10 02:21:41.731503", "step": 2765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:41.763229", "step": 2765, "epoch": 2 }, { "type": "loss", "content": 0.001156167476437986, "timestamp": "2025-09-10 02:21:41.775466", "step": 2766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:41.808417", "step": 2766, "epoch": 2 }, { "type": "loss", "content": 0.003134796628728509, "timestamp": "2025-09-10 02:21:41.815705", "step": 2767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:41.847048", "step": 2767, "epoch": 2 }, { "type": "loss", "content": 0.03722445294260979, "timestamp": "2025-09-10 02:21:41.874747", "step": 2768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:41.906579", "step": 2768, "epoch": 2 }, { "type": "loss", "content": 0.0030981996096670628, "timestamp": "2025-09-10 02:21:41.910934", "step": 2769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:41.941649", "step": 2769, "epoch": 2 }, { "type": "loss", "content": 0.026767630130052567, "timestamp": "2025-09-10 02:21:41.945986", "step": 2770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:41.977046", "step": 2770, "epoch": 2 }, { "type": "loss", "content": 0.004015625920146704, "timestamp": "2025-09-10 02:21:41.983688", "step": 2771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:42.014899", "step": 2771, "epoch": 2 }, { "type": "loss", "content": 0.01961735263466835, "timestamp": "2025-09-10 02:21:42.042822", "step": 2772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.074168", "step": 2772, "epoch": 2 }, { "type": "loss", "content": 0.004195111338049173, "timestamp": "2025-09-10 02:21:42.081567", "step": 2773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.113471", "step": 2773, "epoch": 2 }, { "type": "loss", "content": 0.00503236660733819, "timestamp": "2025-09-10 02:21:42.123489", "step": 2774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:42.155417", "step": 2774, "epoch": 2 }, { "type": "loss", "content": 0.00502787483856082, "timestamp": "2025-09-10 02:21:42.162981", "step": 2775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.194347", "step": 2775, "epoch": 2 }, { "type": "loss", "content": 0.024155091494321823, "timestamp": "2025-09-10 02:21:42.225406", "step": 2776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.256407", "step": 2776, "epoch": 2 }, { "type": "loss", "content": 0.02515154518187046, "timestamp": "2025-09-10 02:21:42.264155", "step": 2777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:42.295541", "step": 2777, "epoch": 2 }, { "type": "loss", "content": 0.018512414768338203, "timestamp": "2025-09-10 02:21:42.306224", "step": 2778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:42.338151", "step": 2778, "epoch": 2 }, { "type": "loss", "content": 0.006325080059468746, "timestamp": "2025-09-10 02:21:42.349127", "step": 2779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.385992", "step": 2779, "epoch": 2 }, { "type": "loss", "content": 0.0009645558893680573, "timestamp": "2025-09-10 02:21:42.417123", "step": 2780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:42.447826", "step": 2780, "epoch": 2 }, { "type": "loss", "content": 0.004604689311236143, "timestamp": "2025-09-10 02:21:42.453228", "step": 2781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:42.483632", "step": 2781, "epoch": 2 }, { "type": "loss", "content": 0.004039444029331207, "timestamp": "2025-09-10 02:21:42.490568", "step": 2782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:42.520587", "step": 2782, "epoch": 2 }, { "type": "loss", "content": 0.0015288168797269464, "timestamp": "2025-09-10 02:21:42.528202", "step": 2783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:42.559405", "step": 2783, "epoch": 2 }, { "type": "loss", "content": 0.004764964338392019, "timestamp": "2025-09-10 02:21:42.584432", "step": 2784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.615196", "step": 2784, "epoch": 2 }, { "type": "loss", "content": 0.001542671350762248, "timestamp": "2025-09-10 02:21:42.623018", "step": 2785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:42.654012", "step": 2785, "epoch": 2 }, { "type": "loss", "content": 0.000828076503239572, "timestamp": "2025-09-10 02:21:42.665094", "step": 2786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:42.696485", "step": 2786, "epoch": 2 }, { "type": "loss", "content": 0.0006047695060260594, "timestamp": "2025-09-10 02:21:42.706673", "step": 2787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:42.737739", "step": 2787, "epoch": 2 }, { "type": "loss", "content": 0.002101297490298748, "timestamp": "2025-09-10 02:21:42.770979", "step": 2788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:21:42.803580", "step": 2788, "epoch": 2 }, { "type": "loss", "content": 0.0025870150420814753, "timestamp": "2025-09-10 02:21:42.806965", "step": 2789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:42.841421", "step": 2789, "epoch": 2 }, { "type": "loss", "content": 0.028584027662873268, "timestamp": "2025-09-10 02:21:42.848544", "step": 2790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:42.887114", "step": 2790, "epoch": 2 }, { "type": "loss", "content": 0.035170648247003555, "timestamp": "2025-09-10 02:21:42.893869", "step": 2791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:42.929384", "step": 2791, "epoch": 2 }, { "type": "loss", "content": 0.0014619800494983792, "timestamp": "2025-09-10 02:21:42.957717", "step": 2792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:42.995938", "step": 2792, "epoch": 2 }, { "type": "loss", "content": 0.01359619665890932, "timestamp": "2025-09-10 02:21:43.001446", "step": 2793, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:21:53.465801", "step": 2793, "epoch": 2 }, { "type": "pplx", "content": 19197019.4612857, "timestamp": "2025-09-10 02:21:53.469410", "step": 2793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:53.499938", "step": 2793, "epoch": 2 }, { "type": "loss", "content": 0.000690083543304354, "timestamp": "2025-09-10 02:21:53.510007", "step": 2794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:53.544004", "step": 2794, "epoch": 2 }, { "type": "loss", "content": 0.056752197444438934, "timestamp": "2025-09-10 02:21:53.557315", "step": 2795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:53.588933", "step": 2795, "epoch": 2 }, { "type": "loss", "content": 0.0039436123333871365, "timestamp": "2025-09-10 02:21:53.616773", "step": 2796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:53.647612", "step": 2796, "epoch": 2 }, { "type": "loss", "content": 0.003669754136353731, "timestamp": "2025-09-10 02:21:53.653063", "step": 2797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:53.684749", "step": 2797, "epoch": 2 }, { "type": "loss", "content": 0.0009105192148126662, "timestamp": "2025-09-10 02:21:53.692063", "step": 2798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:53.723340", "step": 2798, "epoch": 2 }, { "type": "loss", "content": 0.012229084968566895, "timestamp": "2025-09-10 02:21:53.727670", "step": 2799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:53.757906", "step": 2799, "epoch": 2 }, { "type": "loss", "content": 0.0010326796909794211, "timestamp": "2025-09-10 02:21:53.783340", "step": 2800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:53.814232", "step": 2800, "epoch": 2 }, { "type": "loss", "content": 0.015513862483203411, "timestamp": "2025-09-10 02:21:53.816480", "step": 2801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:53.851680", "step": 2801, "epoch": 2 }, { "type": "loss", "content": 0.022338945418596268, "timestamp": "2025-09-10 02:21:53.865695", "step": 2802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:53.898345", "step": 2802, "epoch": 2 }, { "type": "loss", "content": 0.012829114682972431, "timestamp": "2025-09-10 02:21:53.905512", "step": 2803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:53.935544", "step": 2803, "epoch": 2 }, { "type": "loss", "content": 0.004405899439007044, "timestamp": "2025-09-10 02:21:53.961305", "step": 2804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:54.000565", "step": 2804, "epoch": 2 }, { "type": "loss", "content": 0.0013872667914256454, "timestamp": "2025-09-10 02:21:54.011093", "step": 2805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:54.046640", "step": 2805, "epoch": 2 }, { "type": "loss", "content": 0.005773500073701143, "timestamp": "2025-09-10 02:21:54.057600", "step": 2806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:54.092646", "step": 2806, "epoch": 2 }, { "type": "loss", "content": 0.010280570015311241, "timestamp": "2025-09-10 02:21:54.099713", "step": 2807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:54.134369", "step": 2807, "epoch": 2 }, { "type": "loss", "content": 0.03476468473672867, "timestamp": "2025-09-10 02:21:54.162308", "step": 2808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.196654", "step": 2808, "epoch": 2 }, { "type": "loss", "content": 0.0031658527441322803, "timestamp": "2025-09-10 02:21:54.198954", "step": 2809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.232450", "step": 2809, "epoch": 2 }, { "type": "loss", "content": 0.006831489037722349, "timestamp": "2025-09-10 02:21:54.237018", "step": 2810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:54.278093", "step": 2810, "epoch": 2 }, { "type": "loss", "content": 0.0033258756157010794, "timestamp": "2025-09-10 02:21:54.290608", "step": 2811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:54.328729", "step": 2811, "epoch": 2 }, { "type": "loss", "content": 0.024213241413235664, "timestamp": "2025-09-10 02:21:54.359819", "step": 2812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:54.393296", "step": 2812, "epoch": 2 }, { "type": "loss", "content": 0.003739068517461419, "timestamp": "2025-09-10 02:21:54.398570", "step": 2813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:54.449409", "step": 2813, "epoch": 2 }, { "type": "loss", "content": 0.0015904037281870842, "timestamp": "2025-09-10 02:21:54.457159", "step": 2814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.494234", "step": 2814, "epoch": 2 }, { "type": "loss", "content": 0.0012923607137054205, "timestamp": "2025-09-10 02:21:54.498995", "step": 2815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:54.535980", "step": 2815, "epoch": 2 }, { "type": "loss", "content": 0.0011023187544196844, "timestamp": "2025-09-10 02:21:54.564641", "step": 2816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:54.599843", "step": 2816, "epoch": 2 }, { "type": "loss", "content": 0.005850085057318211, "timestamp": "2025-09-10 02:21:54.604642", "step": 2817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.642705", "step": 2817, "epoch": 2 }, { "type": "loss", "content": 0.006922147236764431, "timestamp": "2025-09-10 02:21:54.647104", "step": 2818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.682379", "step": 2818, "epoch": 2 }, { "type": "loss", "content": 0.006881711073219776, "timestamp": "2025-09-10 02:21:54.688316", "step": 2819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:54.724767", "step": 2819, "epoch": 2 }, { "type": "loss", "content": 0.002905226079747081, "timestamp": "2025-09-10 02:21:54.759067", "step": 2820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:54.796273", "step": 2820, "epoch": 2 }, { "type": "loss", "content": 0.042592164129018784, "timestamp": "2025-09-10 02:21:54.804964", "step": 2821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:54.847161", "step": 2821, "epoch": 2 }, { "type": "loss", "content": 0.0068238540552556515, "timestamp": "2025-09-10 02:21:54.853508", "step": 2822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:54.891239", "step": 2822, "epoch": 2 }, { "type": "loss", "content": 0.02196827158331871, "timestamp": "2025-09-10 02:21:54.898080", "step": 2823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:54.934244", "step": 2823, "epoch": 2 }, { "type": "loss", "content": 0.007932315580546856, "timestamp": "2025-09-10 02:21:54.962639", "step": 2824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:54.994981", "step": 2824, "epoch": 2 }, { "type": "loss", "content": 0.009902574121952057, "timestamp": "2025-09-10 02:21:55.003516", "step": 2825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:55.035712", "step": 2825, "epoch": 2 }, { "type": "loss", "content": 0.0023097884841263294, "timestamp": "2025-09-10 02:21:55.046003", "step": 2826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:55.076515", "step": 2826, "epoch": 2 }, { "type": "loss", "content": 0.010782705619931221, "timestamp": "2025-09-10 02:21:55.083561", "step": 2827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:55.113817", "step": 2827, "epoch": 2 }, { "type": "loss", "content": 0.004876940976828337, "timestamp": "2025-09-10 02:21:55.145150", "step": 2828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:55.174739", "step": 2828, "epoch": 2 }, { "type": "loss", "content": 0.0016192414332181215, "timestamp": "2025-09-10 02:21:55.176954", "step": 2829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:55.207595", "step": 2829, "epoch": 2 }, { "type": "loss", "content": 0.013937892392277718, "timestamp": "2025-09-10 02:21:55.214827", "step": 2830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:55.245059", "step": 2830, "epoch": 2 }, { "type": "loss", "content": 0.003963314928114414, "timestamp": "2025-09-10 02:21:55.252530", "step": 2831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:55.283769", "step": 2831, "epoch": 2 }, { "type": "loss", "content": 0.018773654475808144, "timestamp": "2025-09-10 02:21:55.308637", "step": 2832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:55.340079", "step": 2832, "epoch": 2 }, { "type": "loss", "content": 0.017363855615258217, "timestamp": "2025-09-10 02:21:55.344371", "step": 2833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:55.374762", "step": 2833, "epoch": 2 }, { "type": "loss", "content": 0.003922105301171541, "timestamp": "2025-09-10 02:21:55.381507", "step": 2834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:55.412237", "step": 2834, "epoch": 2 }, { "type": "loss", "content": 0.01441988069564104, "timestamp": "2025-09-10 02:21:55.419228", "step": 2835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:55.456602", "step": 2835, "epoch": 2 }, { "type": "loss", "content": 0.016393983736634254, "timestamp": "2025-09-10 02:21:55.487947", "step": 2836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:55.520112", "step": 2836, "epoch": 2 }, { "type": "loss", "content": 0.0013055962044745684, "timestamp": "2025-09-10 02:21:55.522406", "step": 2837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:55.553310", "step": 2837, "epoch": 2 }, { "type": "loss", "content": 0.002322630723938346, "timestamp": "2025-09-10 02:21:55.560592", "step": 2838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:55.591278", "step": 2838, "epoch": 2 }, { "type": "loss", "content": 0.0012900998117402196, "timestamp": "2025-09-10 02:21:55.599034", "step": 2839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:55.631620", "step": 2839, "epoch": 2 }, { "type": "loss", "content": 0.004327766597270966, "timestamp": "2025-09-10 02:21:55.659512", "step": 2840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:55.692246", "step": 2840, "epoch": 2 }, { "type": "loss", "content": 0.0029336088337004185, "timestamp": "2025-09-10 02:21:55.699962", "step": 2841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:55.731399", "step": 2841, "epoch": 2 }, { "type": "loss", "content": 0.008453912101686, "timestamp": "2025-09-10 02:21:55.735812", "step": 2842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:55.774425", "step": 2842, "epoch": 2 }, { "type": "loss", "content": 0.00400108378380537, "timestamp": "2025-09-10 02:21:55.790113", "step": 2843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:55.819970", "step": 2843, "epoch": 2 }, { "type": "loss", "content": 0.001131609664298594, "timestamp": "2025-09-10 02:21:55.847859", "step": 2844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:55.883692", "step": 2844, "epoch": 2 }, { "type": "loss", "content": 0.04177376627922058, "timestamp": "2025-09-10 02:21:55.887997", "step": 2845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:55.926164", "step": 2845, "epoch": 2 }, { "type": "loss", "content": 0.013375887647271156, "timestamp": "2025-09-10 02:21:55.934060", "step": 2846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:21:55.979412", "step": 2846, "epoch": 2 }, { "type": "loss", "content": 0.034866467118263245, "timestamp": "2025-09-10 02:21:55.995621", "step": 2847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:56.040186", "step": 2847, "epoch": 2 }, { "type": "loss", "content": 0.008052507415413857, "timestamp": "2025-09-10 02:21:56.065565", "step": 2848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:56.106829", "step": 2848, "epoch": 2 }, { "type": "loss", "content": 0.0027601835317909718, "timestamp": "2025-09-10 02:21:56.119851", "step": 2849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:56.159735", "step": 2849, "epoch": 2 }, { "type": "loss", "content": 0.0046163699589669704, "timestamp": "2025-09-10 02:21:56.167667", "step": 2850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:56.204477", "step": 2850, "epoch": 2 }, { "type": "loss", "content": 0.0024019996635615826, "timestamp": "2025-09-10 02:21:56.208674", "step": 2851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:56.250504", "step": 2851, "epoch": 2 }, { "type": "loss", "content": 0.00619547301903367, "timestamp": "2025-09-10 02:21:56.278254", "step": 2852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:56.309613", "step": 2852, "epoch": 2 }, { "type": "loss", "content": 0.011628863401710987, "timestamp": "2025-09-10 02:21:56.311783", "step": 2853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:21:56.341612", "step": 2853, "epoch": 2 }, { "type": "loss", "content": 0.011419777758419514, "timestamp": "2025-09-10 02:21:56.343838", "step": 2854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:21:56.382442", "step": 2854, "epoch": 2 }, { "type": "loss", "content": 0.012729802168905735, "timestamp": "2025-09-10 02:21:56.398256", "step": 2855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:21:56.429465", "step": 2855, "epoch": 2 }, { "type": "loss", "content": 0.0015720551600679755, "timestamp": "2025-09-10 02:21:56.453669", "step": 2856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:56.484715", "step": 2856, "epoch": 2 }, { "type": "loss", "content": 0.014717082493007183, "timestamp": "2025-09-10 02:21:56.488184", "step": 2857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:56.518013", "step": 2857, "epoch": 2 }, { "type": "loss", "content": 0.003454964840784669, "timestamp": "2025-09-10 02:21:56.525556", "step": 2858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:56.556246", "step": 2858, "epoch": 2 }, { "type": "loss", "content": 0.002767723286524415, "timestamp": "2025-09-10 02:21:56.566504", "step": 2859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:56.596507", "step": 2859, "epoch": 2 }, { "type": "loss", "content": 0.012848809361457825, "timestamp": "2025-09-10 02:21:56.624554", "step": 2860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:56.655309", "step": 2860, "epoch": 2 }, { "type": "loss", "content": 0.015174107626080513, "timestamp": "2025-09-10 02:21:56.663342", "step": 2861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:21:56.697483", "step": 2861, "epoch": 2 }, { "type": "loss", "content": 0.006852737162262201, "timestamp": "2025-09-10 02:21:56.711212", "step": 2862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:56.742102", "step": 2862, "epoch": 2 }, { "type": "loss", "content": 0.016556836664676666, "timestamp": "2025-09-10 02:21:56.749989", "step": 2863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:56.780543", "step": 2863, "epoch": 2 }, { "type": "loss", "content": 0.012003665789961815, "timestamp": "2025-09-10 02:21:56.809224", "step": 2864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:56.840636", "step": 2864, "epoch": 2 }, { "type": "loss", "content": 0.0033726885449141264, "timestamp": "2025-09-10 02:21:56.846253", "step": 2865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:56.878932", "step": 2865, "epoch": 2 }, { "type": "loss", "content": 0.001546733663417399, "timestamp": "2025-09-10 02:21:56.891219", "step": 2866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:56.925077", "step": 2866, "epoch": 2 }, { "type": "loss", "content": 0.018309442326426506, "timestamp": "2025-09-10 02:21:56.937635", "step": 2867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:21:56.976009", "step": 2867, "epoch": 2 }, { "type": "loss", "content": 0.005003686994314194, "timestamp": "2025-09-10 02:21:57.011500", "step": 2868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:57.050458", "step": 2868, "epoch": 2 }, { "type": "loss", "content": 0.01586066372692585, "timestamp": "2025-09-10 02:21:57.058502", "step": 2869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:57.094025", "step": 2869, "epoch": 2 }, { "type": "loss", "content": 0.022168749943375587, "timestamp": "2025-09-10 02:21:57.101730", "step": 2870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:21:57.145405", "step": 2870, "epoch": 2 }, { "type": "loss", "content": 0.011965368874371052, "timestamp": "2025-09-10 02:21:57.161292", "step": 2871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:57.202765", "step": 2871, "epoch": 2 }, { "type": "loss", "content": 0.0080100167542696, "timestamp": "2025-09-10 02:21:57.230608", "step": 2872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:21:57.269690", "step": 2872, "epoch": 2 }, { "type": "loss", "content": 0.004915738943964243, "timestamp": "2025-09-10 02:21:57.282756", "step": 2873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:21:57.324127", "step": 2873, "epoch": 2 }, { "type": "loss", "content": 0.036590684205293655, "timestamp": "2025-09-10 02:21:57.337518", "step": 2874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:57.378714", "step": 2874, "epoch": 2 }, { "type": "loss", "content": 0.016932787373661995, "timestamp": "2025-09-10 02:21:57.383194", "step": 2875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:57.421402", "step": 2875, "epoch": 2 }, { "type": "loss", "content": 0.002477414207533002, "timestamp": "2025-09-10 02:21:57.453426", "step": 2876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:57.493622", "step": 2876, "epoch": 2 }, { "type": "loss", "content": 0.009233239106833935, "timestamp": "2025-09-10 02:21:57.497109", "step": 2877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:57.531367", "step": 2877, "epoch": 2 }, { "type": "loss", "content": 0.004387423861771822, "timestamp": "2025-09-10 02:21:57.538838", "step": 2878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:57.576986", "step": 2878, "epoch": 2 }, { "type": "loss", "content": 0.0022044102661311626, "timestamp": "2025-09-10 02:21:57.581193", "step": 2879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:57.620656", "step": 2879, "epoch": 2 }, { "type": "loss", "content": 0.003344293450936675, "timestamp": "2025-09-10 02:21:57.653820", "step": 2880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:57.688571", "step": 2880, "epoch": 2 }, { "type": "loss", "content": 0.0033048386685550213, "timestamp": "2025-09-10 02:21:57.693903", "step": 2881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:57.729127", "step": 2881, "epoch": 2 }, { "type": "loss", "content": 0.013956844806671143, "timestamp": "2025-09-10 02:21:57.733657", "step": 2882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:57.772898", "step": 2882, "epoch": 2 }, { "type": "loss", "content": 0.017580043524503708, "timestamp": "2025-09-10 02:21:57.780655", "step": 2883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:57.823983", "step": 2883, "epoch": 2 }, { "type": "loss", "content": 0.006970468442887068, "timestamp": "2025-09-10 02:21:57.852000", "step": 2884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:57.892963", "step": 2884, "epoch": 2 }, { "type": "loss", "content": 0.002934870542958379, "timestamp": "2025-09-10 02:21:57.901055", "step": 2885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:57.938969", "step": 2885, "epoch": 2 }, { "type": "loss", "content": 0.0011385561665520072, "timestamp": "2025-09-10 02:21:57.946648", "step": 2886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:57.999025", "step": 2886, "epoch": 2 }, { "type": "loss", "content": 0.010981320403516293, "timestamp": "2025-09-10 02:21:58.010261", "step": 2887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:58.064350", "step": 2887, "epoch": 2 }, { "type": "loss", "content": 0.01800290308892727, "timestamp": "2025-09-10 02:21:58.096941", "step": 2888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:58.139621", "step": 2888, "epoch": 2 }, { "type": "loss", "content": 0.00524926045909524, "timestamp": "2025-09-10 02:21:58.147001", "step": 2889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:21:58.205833", "step": 2889, "epoch": 2 }, { "type": "loss", "content": 0.004338169004768133, "timestamp": "2025-09-10 02:21:58.223203", "step": 2890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:58.268415", "step": 2890, "epoch": 2 }, { "type": "loss", "content": 0.01495091337710619, "timestamp": "2025-09-10 02:21:58.275632", "step": 2891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:58.305991", "step": 2891, "epoch": 2 }, { "type": "loss", "content": 0.0038837611209601164, "timestamp": "2025-09-10 02:21:58.331474", "step": 2892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 688 ], "flops": 20408222954560 }, "timestamp": "2025-09-10 02:21:58.386481", "step": 2892, "epoch": 2 }, { "type": "loss", "content": 0.001870313542895019, "timestamp": "2025-09-10 02:21:58.410780", "step": 2893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:58.441343", "step": 2893, "epoch": 2 }, { "type": "loss", "content": 0.0013324370374903083, "timestamp": "2025-09-10 02:21:58.444028", "step": 2894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:58.474641", "step": 2894, "epoch": 2 }, { "type": "loss", "content": 0.003104160074144602, "timestamp": "2025-09-10 02:21:58.482522", "step": 2895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:58.514154", "step": 2895, "epoch": 2 }, { "type": "loss", "content": 0.0025289487093687057, "timestamp": "2025-09-10 02:21:58.542545", "step": 2896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:58.573421", "step": 2896, "epoch": 2 }, { "type": "loss", "content": 0.0017517339438199997, "timestamp": "2025-09-10 02:21:58.575491", "step": 2897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:58.606654", "step": 2897, "epoch": 2 }, { "type": "loss", "content": 0.004530445206910372, "timestamp": "2025-09-10 02:21:58.618786", "step": 2898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:21:58.650642", "step": 2898, "epoch": 2 }, { "type": "loss", "content": 0.0009508281364105642, "timestamp": "2025-09-10 02:21:58.658545", "step": 2899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:58.689706", "step": 2899, "epoch": 2 }, { "type": "loss", "content": 0.012976233847439289, "timestamp": "2025-09-10 02:21:58.717747", "step": 2900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:21:58.754243", "step": 2900, "epoch": 2 }, { "type": "loss", "content": 0.002717123832553625, "timestamp": "2025-09-10 02:21:58.769406", "step": 2901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:21:58.800524", "step": 2901, "epoch": 2 }, { "type": "loss", "content": 0.019545141607522964, "timestamp": "2025-09-10 02:21:58.807986", "step": 2902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-10 02:21:58.854567", "step": 2902, "epoch": 2 }, { "type": "loss", "content": 0.0025809798389673233, "timestamp": "2025-09-10 02:21:58.873979", "step": 2903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:58.910932", "step": 2903, "epoch": 2 }, { "type": "loss", "content": 0.0010793408146128058, "timestamp": "2025-09-10 02:21:58.939062", "step": 2904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:58.969530", "step": 2904, "epoch": 2 }, { "type": "loss", "content": 0.002281660446897149, "timestamp": "2025-09-10 02:21:58.977402", "step": 2905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:59.008417", "step": 2905, "epoch": 2 }, { "type": "loss", "content": 0.008847257122397423, "timestamp": "2025-09-10 02:21:59.015401", "step": 2906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.051422", "step": 2906, "epoch": 2 }, { "type": "loss", "content": 0.00565936928614974, "timestamp": "2025-09-10 02:21:59.055983", "step": 2907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:21:59.094239", "step": 2907, "epoch": 2 }, { "type": "loss", "content": 0.0032904818654060364, "timestamp": "2025-09-10 02:21:59.127238", "step": 2908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:59.158629", "step": 2908, "epoch": 2 }, { "type": "loss", "content": 0.0015532250981777906, "timestamp": "2025-09-10 02:21:59.168322", "step": 2909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:59.200228", "step": 2909, "epoch": 2 }, { "type": "loss", "content": 0.0015883222222328186, "timestamp": "2025-09-10 02:21:59.207380", "step": 2910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:59.238162", "step": 2910, "epoch": 2 }, { "type": "loss", "content": 0.01465687807649374, "timestamp": "2025-09-10 02:21:59.250384", "step": 2911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.285448", "step": 2911, "epoch": 2 }, { "type": "loss", "content": 0.000894768163561821, "timestamp": "2025-09-10 02:21:59.310633", "step": 2912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.341168", "step": 2912, "epoch": 2 }, { "type": "loss", "content": 0.001928298850543797, "timestamp": "2025-09-10 02:21:59.343617", "step": 2913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.375092", "step": 2913, "epoch": 2 }, { "type": "loss", "content": 0.0025657066144049168, "timestamp": "2025-09-10 02:21:59.379746", "step": 2914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:21:59.414770", "step": 2914, "epoch": 2 }, { "type": "loss", "content": 0.0007942708325572312, "timestamp": "2025-09-10 02:21:59.428803", "step": 2915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.461407", "step": 2915, "epoch": 2 }, { "type": "loss", "content": 0.012047487311065197, "timestamp": "2025-09-10 02:21:59.486808", "step": 2916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:21:59.527655", "step": 2916, "epoch": 2 }, { "type": "loss", "content": 0.030188219621777534, "timestamp": "2025-09-10 02:21:59.532533", "step": 2917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:21:59.570354", "step": 2917, "epoch": 2 }, { "type": "loss", "content": 0.02199845388531685, "timestamp": "2025-09-10 02:21:59.577441", "step": 2918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:21:59.614234", "step": 2918, "epoch": 2 }, { "type": "loss", "content": 0.010863765142858028, "timestamp": "2025-09-10 02:21:59.625220", "step": 2919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:21:59.669965", "step": 2919, "epoch": 2 }, { "type": "loss", "content": 0.0012042339658364654, "timestamp": "2025-09-10 02:21:59.704651", "step": 2920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:59.735315", "step": 2920, "epoch": 2 }, { "type": "loss", "content": 0.04105643555521965, "timestamp": "2025-09-10 02:21:59.737634", "step": 2921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:59.767694", "step": 2921, "epoch": 2 }, { "type": "loss", "content": 0.00044288174831308424, "timestamp": "2025-09-10 02:21:59.770211", "step": 2922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:21:59.800706", "step": 2922, "epoch": 2 }, { "type": "loss", "content": 0.006732792127877474, "timestamp": "2025-09-10 02:21:59.811614", "step": 2923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:21:59.841546", "step": 2923, "epoch": 2 }, { "type": "loss", "content": 0.0017994015943259, "timestamp": "2025-09-10 02:21:59.869331", "step": 2924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:21:59.900936", "step": 2924, "epoch": 2 }, { "type": "loss", "content": 0.0027029775083065033, "timestamp": "2025-09-10 02:21:59.910460", "step": 2925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:21:59.956229", "step": 2925, "epoch": 2 }, { "type": "loss", "content": 0.005239599384367466, "timestamp": "2025-09-10 02:21:59.960170", "step": 2926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:21:59.994080", "step": 2926, "epoch": 2 }, { "type": "loss", "content": 0.006697875447571278, "timestamp": "2025-09-10 02:22:00.004375", "step": 2927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:00.038069", "step": 2927, "epoch": 2 }, { "type": "loss", "content": 0.03148489445447922, "timestamp": "2025-09-10 02:22:00.063041", "step": 2928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:00.094406", "step": 2928, "epoch": 2 }, { "type": "loss", "content": 0.020124191418290138, "timestamp": "2025-09-10 02:22:00.099254", "step": 2929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:22:00.140652", "step": 2929, "epoch": 2 }, { "type": "loss", "content": 0.010933955200016499, "timestamp": "2025-09-10 02:22:00.157710", "step": 2930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:00.190255", "step": 2930, "epoch": 2 }, { "type": "loss", "content": 0.013913453556597233, "timestamp": "2025-09-10 02:22:00.202665", "step": 2931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:00.234784", "step": 2931, "epoch": 2 }, { "type": "loss", "content": 0.003610015381127596, "timestamp": "2025-09-10 02:22:00.262635", "step": 2932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:00.293763", "step": 2932, "epoch": 2 }, { "type": "loss", "content": 0.001940641668625176, "timestamp": "2025-09-10 02:22:00.298029", "step": 2933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:00.330128", "step": 2933, "epoch": 2 }, { "type": "loss", "content": 0.0037323671858757734, "timestamp": "2025-09-10 02:22:00.340931", "step": 2934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:00.374918", "step": 2934, "epoch": 2 }, { "type": "loss", "content": 0.01665830798447132, "timestamp": "2025-09-10 02:22:00.388319", "step": 2935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:00.419887", "step": 2935, "epoch": 2 }, { "type": "loss", "content": 0.005614429712295532, "timestamp": "2025-09-10 02:22:00.447847", "step": 2936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:00.479376", "step": 2936, "epoch": 2 }, { "type": "loss", "content": 0.0027582976035773754, "timestamp": "2025-09-10 02:22:00.487368", "step": 2937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:00.518110", "step": 2937, "epoch": 2 }, { "type": "loss", "content": 0.003742832690477371, "timestamp": "2025-09-10 02:22:00.525908", "step": 2938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:00.556958", "step": 2938, "epoch": 2 }, { "type": "loss", "content": 0.039867255836725235, "timestamp": "2025-09-10 02:22:00.561533", "step": 2939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:00.593247", "step": 2939, "epoch": 2 }, { "type": "loss", "content": 0.0062867277301847935, "timestamp": "2025-09-10 02:22:00.618271", "step": 2940, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:22:10.925419", "step": 2940, "epoch": 2 }, { "type": "pplx", "content": 20450711.8035112, "timestamp": "2025-09-10 02:22:10.928876", "step": 2940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:10.958777", "step": 2940, "epoch": 2 }, { "type": "loss", "content": 0.00988749973475933, "timestamp": "2025-09-10 02:22:10.967420", "step": 2941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:10.999190", "step": 2941, "epoch": 2 }, { "type": "loss", "content": 0.0008435306954197586, "timestamp": "2025-09-10 02:22:11.006091", "step": 2942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:11.036991", "step": 2942, "epoch": 2 }, { "type": "loss", "content": 0.0046732001937925816, "timestamp": "2025-09-10 02:22:11.047700", "step": 2943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:11.079822", "step": 2943, "epoch": 2 }, { "type": "loss", "content": 0.01026154775172472, "timestamp": "2025-09-10 02:22:11.107510", "step": 2944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:11.138368", "step": 2944, "epoch": 2 }, { "type": "loss", "content": 0.0024215218145400286, "timestamp": "2025-09-10 02:22:11.143757", "step": 2945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 816 ], "flops": 24205052762688 }, "timestamp": "2025-09-10 02:22:11.212348", "step": 2945, "epoch": 2 }, { "type": "loss", "content": 0.000555099977646023, "timestamp": "2025-09-10 02:22:11.240841", "step": 2946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:11.270823", "step": 2946, "epoch": 2 }, { "type": "loss", "content": 0.003919025417417288, "timestamp": "2025-09-10 02:22:11.278728", "step": 2947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:11.309682", "step": 2947, "epoch": 2 }, { "type": "loss", "content": 0.0007654453511349857, "timestamp": "2025-09-10 02:22:11.340662", "step": 2948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:11.373858", "step": 2948, "epoch": 2 }, { "type": "loss", "content": 0.007311842869967222, "timestamp": "2025-09-10 02:22:11.383725", "step": 2949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:11.414295", "step": 2949, "epoch": 2 }, { "type": "loss", "content": 0.0015561177860945463, "timestamp": "2025-09-10 02:22:11.418689", "step": 2950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:11.449620", "step": 2950, "epoch": 2 }, { "type": "loss", "content": 0.003131111618131399, "timestamp": "2025-09-10 02:22:11.457383", "step": 2951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:11.487810", "step": 2951, "epoch": 2 }, { "type": "loss", "content": 0.004036908969283104, "timestamp": "2025-09-10 02:22:11.513200", "step": 2952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:11.546861", "step": 2952, "epoch": 2 }, { "type": "loss", "content": 0.003508640918880701, "timestamp": "2025-09-10 02:22:11.560205", "step": 2953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:11.590549", "step": 2953, "epoch": 2 }, { "type": "loss", "content": 0.005527927540242672, "timestamp": "2025-09-10 02:22:11.597845", "step": 2954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:11.630810", "step": 2954, "epoch": 2 }, { "type": "loss", "content": 0.013634276576340199, "timestamp": "2025-09-10 02:22:11.638282", "step": 2955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:11.669259", "step": 2955, "epoch": 2 }, { "type": "loss", "content": 0.004053633194416761, "timestamp": "2025-09-10 02:22:11.697820", "step": 2956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:11.728460", "step": 2956, "epoch": 2 }, { "type": "loss", "content": 0.004635666497051716, "timestamp": "2025-09-10 02:22:11.733519", "step": 2957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:11.763929", "step": 2957, "epoch": 2 }, { "type": "loss", "content": 0.001686741947196424, "timestamp": "2025-09-10 02:22:11.770938", "step": 2958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:11.802254", "step": 2958, "epoch": 2 }, { "type": "loss", "content": 0.013765445910394192, "timestamp": "2025-09-10 02:22:11.809208", "step": 2959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:11.839864", "step": 2959, "epoch": 2 }, { "type": "loss", "content": 0.0005294339498504996, "timestamp": "2025-09-10 02:22:11.872901", "step": 2960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:11.904098", "step": 2960, "epoch": 2 }, { "type": "loss", "content": 0.012235159985721111, "timestamp": "2025-09-10 02:22:11.914672", "step": 2961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:22:11.951422", "step": 2961, "epoch": 2 }, { "type": "loss", "content": 0.004377015866339207, "timestamp": "2025-09-10 02:22:11.965222", "step": 2962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:11.996458", "step": 2962, "epoch": 2 }, { "type": "loss", "content": 0.0006293201586231589, "timestamp": "2025-09-10 02:22:12.003260", "step": 2963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:12.036249", "step": 2963, "epoch": 2 }, { "type": "loss", "content": 0.0035972814075648785, "timestamp": "2025-09-10 02:22:12.068062", "step": 2964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:12.107853", "step": 2964, "epoch": 2 }, { "type": "loss", "content": 8.064762369031087e-05, "timestamp": "2025-09-10 02:22:12.110062", "step": 2965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:12.149792", "step": 2965, "epoch": 2 }, { "type": "loss", "content": 0.0006316312937997282, "timestamp": "2025-09-10 02:22:12.157021", "step": 2966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:12.189130", "step": 2966, "epoch": 2 }, { "type": "loss", "content": 0.0010401438921689987, "timestamp": "2025-09-10 02:22:12.196795", "step": 2967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:12.235983", "step": 2967, "epoch": 2 }, { "type": "loss", "content": 0.016096774488687515, "timestamp": "2025-09-10 02:22:12.261540", "step": 2968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:12.295464", "step": 2968, "epoch": 2 }, { "type": "loss", "content": 0.002507053781300783, "timestamp": "2025-09-10 02:22:12.303329", "step": 2969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:12.337886", "step": 2969, "epoch": 2 }, { "type": "loss", "content": 0.0009550213580951095, "timestamp": "2025-09-10 02:22:12.351270", "step": 2970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:12.382407", "step": 2970, "epoch": 2 }, { "type": "loss", "content": 0.00042923627188429236, "timestamp": "2025-09-10 02:22:12.394518", "step": 2971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:12.426545", "step": 2971, "epoch": 2 }, { "type": "loss", "content": 0.001259053940884769, "timestamp": "2025-09-10 02:22:12.451335", "step": 2972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:12.484720", "step": 2972, "epoch": 2 }, { "type": "loss", "content": 0.0048403749242424965, "timestamp": "2025-09-10 02:22:12.497725", "step": 2973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:12.531127", "step": 2973, "epoch": 2 }, { "type": "loss", "content": 0.005875090602785349, "timestamp": "2025-09-10 02:22:12.538186", "step": 2974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:12.570319", "step": 2974, "epoch": 2 }, { "type": "loss", "content": 0.00015017333498690277, "timestamp": "2025-09-10 02:22:12.577711", "step": 2975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:12.608608", "step": 2975, "epoch": 2 }, { "type": "loss", "content": 0.01487821340560913, "timestamp": "2025-09-10 02:22:12.637276", "step": 2976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:12.670601", "step": 2976, "epoch": 2 }, { "type": "loss", "content": 0.0001285710313823074, "timestamp": "2025-09-10 02:22:12.677586", "step": 2977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:12.709280", "step": 2977, "epoch": 2 }, { "type": "loss", "content": 0.004458011593669653, "timestamp": "2025-09-10 02:22:12.720274", "step": 2978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:12.755600", "step": 2978, "epoch": 2 }, { "type": "loss", "content": 0.0002914820215664804, "timestamp": "2025-09-10 02:22:12.760278", "step": 2979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:12.791212", "step": 2979, "epoch": 2 }, { "type": "loss", "content": 0.000645567080937326, "timestamp": "2025-09-10 02:22:12.819769", "step": 2980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:22:12.853166", "step": 2980, "epoch": 2 }, { "type": "loss", "content": 0.0005663609481416643, "timestamp": "2025-09-10 02:22:12.855008", "step": 2981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:12.885874", "step": 2981, "epoch": 2 }, { "type": "loss", "content": 0.03028297796845436, "timestamp": "2025-09-10 02:22:12.892771", "step": 2982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:12.922939", "step": 2982, "epoch": 2 }, { "type": "loss", "content": 0.0005917864036746323, "timestamp": "2025-09-10 02:22:12.926940", "step": 2983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:12.959502", "step": 2983, "epoch": 2 }, { "type": "loss", "content": 0.001026555197313428, "timestamp": "2025-09-10 02:22:12.988143", "step": 2984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:13.028026", "step": 2984, "epoch": 2 }, { "type": "loss", "content": 0.0009340193355455995, "timestamp": "2025-09-10 02:22:13.035431", "step": 2985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:13.072083", "step": 2985, "epoch": 2 }, { "type": "loss", "content": 0.0008202405297197402, "timestamp": "2025-09-10 02:22:13.079195", "step": 2986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:13.116266", "step": 2986, "epoch": 2 }, { "type": "loss", "content": 0.0028560981154441833, "timestamp": "2025-09-10 02:22:13.128338", "step": 2987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:13.169528", "step": 2987, "epoch": 2 }, { "type": "loss", "content": 0.00015045542386360466, "timestamp": "2025-09-10 02:22:13.197543", "step": 2988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:13.229385", "step": 2988, "epoch": 2 }, { "type": "loss", "content": 0.0005772449658252299, "timestamp": "2025-09-10 02:22:13.236140", "step": 2989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:13.269514", "step": 2989, "epoch": 2 }, { "type": "loss", "content": 0.0008060900145210326, "timestamp": "2025-09-10 02:22:13.277134", "step": 2990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:13.311834", "step": 2990, "epoch": 2 }, { "type": "loss", "content": 0.006533232517540455, "timestamp": "2025-09-10 02:22:13.319297", "step": 2991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:13.354743", "step": 2991, "epoch": 2 }, { "type": "loss", "content": 0.0189223550260067, "timestamp": "2025-09-10 02:22:13.382512", "step": 2992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:13.414356", "step": 2992, "epoch": 2 }, { "type": "loss", "content": 0.0038932212628424168, "timestamp": "2025-09-10 02:22:13.424102", "step": 2993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:13.464029", "step": 2993, "epoch": 2 }, { "type": "loss", "content": 0.0023500225506722927, "timestamp": "2025-09-10 02:22:13.471545", "step": 2994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:13.519871", "step": 2994, "epoch": 2 }, { "type": "loss", "content": 0.029731089249253273, "timestamp": "2025-09-10 02:22:13.522461", "step": 2995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:13.556026", "step": 2995, "epoch": 2 }, { "type": "loss", "content": 0.00560992443934083, "timestamp": "2025-09-10 02:22:13.583643", "step": 2996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:13.614809", "step": 2996, "epoch": 2 }, { "type": "loss", "content": 0.0013980664080008864, "timestamp": "2025-09-10 02:22:13.625149", "step": 2997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:13.657053", "step": 2997, "epoch": 2 }, { "type": "loss", "content": 0.007363726384937763, "timestamp": "2025-09-10 02:22:13.669642", "step": 2998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:13.704091", "step": 2998, "epoch": 2 }, { "type": "loss", "content": 0.005370273254811764, "timestamp": "2025-09-10 02:22:13.711107", "step": 2999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:13.743172", "step": 2999, "epoch": 2 }, { "type": "loss", "content": 0.008254798129200935, "timestamp": "2025-09-10 02:22:13.771504", "step": 3000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3000", "timestamp": "2025-09-10 02:22:18.507756", "step": 3000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:18.561073", "step": 3000, "epoch": 2 }, { "type": "loss", "content": 0.013195289298892021, "timestamp": "2025-09-10 02:22:18.568642", "step": 3001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:18.605389", "step": 3001, "epoch": 2 }, { "type": "loss", "content": 0.00990669522434473, "timestamp": "2025-09-10 02:22:18.609221", "step": 3002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:18.644904", "step": 3002, "epoch": 2 }, { "type": "loss", "content": 0.004560007713735104, "timestamp": "2025-09-10 02:22:18.649210", "step": 3003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:18.687567", "step": 3003, "epoch": 2 }, { "type": "loss", "content": 0.0024112870451062918, "timestamp": "2025-09-10 02:22:18.713265", "step": 3004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:18.749859", "step": 3004, "epoch": 2 }, { "type": "loss", "content": 0.0007551188464276493, "timestamp": "2025-09-10 02:22:18.754752", "step": 3005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:18.786814", "step": 3005, "epoch": 2 }, { "type": "loss", "content": 0.0002144659374607727, "timestamp": "2025-09-10 02:22:18.793390", "step": 3006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:18.831745", "step": 3006, "epoch": 2 }, { "type": "loss", "content": 0.0013251977507025003, "timestamp": "2025-09-10 02:22:18.847306", "step": 3007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:18.879089", "step": 3007, "epoch": 2 }, { "type": "loss", "content": 0.00027837217203341424, "timestamp": "2025-09-10 02:22:18.906511", "step": 3008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:18.939130", "step": 3008, "epoch": 2 }, { "type": "loss", "content": 0.0012016237014904618, "timestamp": "2025-09-10 02:22:18.944273", "step": 3009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:22:18.982271", "step": 3009, "epoch": 2 }, { "type": "loss", "content": 0.016353409737348557, "timestamp": "2025-09-10 02:22:18.996050", "step": 3010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:19.034920", "step": 3010, "epoch": 2 }, { "type": "loss", "content": 0.025526031851768494, "timestamp": "2025-09-10 02:22:19.041451", "step": 3011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:19.077655", "step": 3011, "epoch": 2 }, { "type": "loss", "content": 0.007432910148054361, "timestamp": "2025-09-10 02:22:19.108392", "step": 3012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:19.146844", "step": 3012, "epoch": 2 }, { "type": "loss", "content": 0.0007051877328194678, "timestamp": "2025-09-10 02:22:19.155911", "step": 3013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:19.193294", "step": 3013, "epoch": 2 }, { "type": "loss", "content": 0.010938274674117565, "timestamp": "2025-09-10 02:22:19.205424", "step": 3014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:19.241698", "step": 3014, "epoch": 2 }, { "type": "loss", "content": 0.03111579827964306, "timestamp": "2025-09-10 02:22:19.248729", "step": 3015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:19.280541", "step": 3015, "epoch": 2 }, { "type": "loss", "content": 0.008617566898465157, "timestamp": "2025-09-10 02:22:19.308351", "step": 3016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:19.341108", "step": 3016, "epoch": 2 }, { "type": "loss", "content": 0.014785193838179111, "timestamp": "2025-09-10 02:22:19.346261", "step": 3017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:19.379772", "step": 3017, "epoch": 2 }, { "type": "loss", "content": 0.002250525401905179, "timestamp": "2025-09-10 02:22:19.386866", "step": 3018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:19.418987", "step": 3018, "epoch": 2 }, { "type": "loss", "content": 0.0003496368881314993, "timestamp": "2025-09-10 02:22:19.426212", "step": 3019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:19.457212", "step": 3019, "epoch": 2 }, { "type": "loss", "content": 0.0003628613776527345, "timestamp": "2025-09-10 02:22:19.485625", "step": 3020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:19.521575", "step": 3020, "epoch": 2 }, { "type": "loss", "content": 0.0066203526221215725, "timestamp": "2025-09-10 02:22:19.530594", "step": 3021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:19.563427", "step": 3021, "epoch": 2 }, { "type": "loss", "content": 0.00015238435298670083, "timestamp": "2025-09-10 02:22:19.570361", "step": 3022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:19.601647", "step": 3022, "epoch": 2 }, { "type": "loss", "content": 0.003497667144984007, "timestamp": "2025-09-10 02:22:19.608232", "step": 3023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:19.639837", "step": 3023, "epoch": 2 }, { "type": "loss", "content": 0.04256868362426758, "timestamp": "2025-09-10 02:22:19.671190", "step": 3024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:19.703218", "step": 3024, "epoch": 2 }, { "type": "loss", "content": 0.0011781870853155851, "timestamp": "2025-09-10 02:22:19.705106", "step": 3025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:19.736844", "step": 3025, "epoch": 2 }, { "type": "loss", "content": 0.00529795978218317, "timestamp": "2025-09-10 02:22:19.743694", "step": 3026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:19.774776", "step": 3026, "epoch": 2 }, { "type": "loss", "content": 0.011905157007277012, "timestamp": "2025-09-10 02:22:19.782323", "step": 3027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:19.814092", "step": 3027, "epoch": 2 }, { "type": "loss", "content": 0.0007259399862959981, "timestamp": "2025-09-10 02:22:19.842214", "step": 3028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:19.873893", "step": 3028, "epoch": 2 }, { "type": "loss", "content": 0.00030881358543410897, "timestamp": "2025-09-10 02:22:19.881354", "step": 3029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:19.913353", "step": 3029, "epoch": 2 }, { "type": "loss", "content": 0.02642730250954628, "timestamp": "2025-09-10 02:22:19.920234", "step": 3030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:22:19.961020", "step": 3030, "epoch": 2 }, { "type": "loss", "content": 0.062217261642217636, "timestamp": "2025-09-10 02:22:19.977252", "step": 3031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:20.010741", "step": 3031, "epoch": 2 }, { "type": "loss", "content": 0.010299092158675194, "timestamp": "2025-09-10 02:22:20.038290", "step": 3032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:20.069983", "step": 3032, "epoch": 2 }, { "type": "loss", "content": 0.0013357808347791433, "timestamp": "2025-09-10 02:22:20.072354", "step": 3033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:20.104344", "step": 3033, "epoch": 2 }, { "type": "loss", "content": 0.00296528497710824, "timestamp": "2025-09-10 02:22:20.111120", "step": 3034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.142696", "step": 3034, "epoch": 2 }, { "type": "loss", "content": 0.037451110780239105, "timestamp": "2025-09-10 02:22:20.149596", "step": 3035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:20.182290", "step": 3035, "epoch": 2 }, { "type": "loss", "content": 0.0031133827287703753, "timestamp": "2025-09-10 02:22:20.210302", "step": 3036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:20.242675", "step": 3036, "epoch": 2 }, { "type": "loss", "content": 0.0067618959583342075, "timestamp": "2025-09-10 02:22:20.255690", "step": 3037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:20.287885", "step": 3037, "epoch": 2 }, { "type": "loss", "content": 0.0028574629686772823, "timestamp": "2025-09-10 02:22:20.291785", "step": 3038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.322595", "step": 3038, "epoch": 2 }, { "type": "loss", "content": 0.0004291358927730471, "timestamp": "2025-09-10 02:22:20.329540", "step": 3039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:20.360367", "step": 3039, "epoch": 2 }, { "type": "loss", "content": 0.04179126024246216, "timestamp": "2025-09-10 02:22:20.388995", "step": 3040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:20.419723", "step": 3040, "epoch": 2 }, { "type": "loss", "content": 0.003463194938376546, "timestamp": "2025-09-10 02:22:20.424416", "step": 3041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:20.462606", "step": 3041, "epoch": 2 }, { "type": "loss", "content": 0.04269900918006897, "timestamp": "2025-09-10 02:22:20.478289", "step": 3042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:20.510403", "step": 3042, "epoch": 2 }, { "type": "loss", "content": 0.008046741597354412, "timestamp": "2025-09-10 02:22:20.517900", "step": 3043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.548962", "step": 3043, "epoch": 2 }, { "type": "loss", "content": 0.00842567440122366, "timestamp": "2025-09-10 02:22:20.576760", "step": 3044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.608236", "step": 3044, "epoch": 2 }, { "type": "loss", "content": 0.003832954214885831, "timestamp": "2025-09-10 02:22:20.612900", "step": 3045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.643437", "step": 3045, "epoch": 2 }, { "type": "loss", "content": 0.0032644220627844334, "timestamp": "2025-09-10 02:22:20.650686", "step": 3046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:20.682397", "step": 3046, "epoch": 2 }, { "type": "loss", "content": 0.000450856052339077, "timestamp": "2025-09-10 02:22:20.694639", "step": 3047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:20.725556", "step": 3047, "epoch": 2 }, { "type": "loss", "content": 0.005584734957665205, "timestamp": "2025-09-10 02:22:20.753421", "step": 3048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:20.784286", "step": 3048, "epoch": 2 }, { "type": "loss", "content": 0.001507714157924056, "timestamp": "2025-09-10 02:22:20.788909", "step": 3049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:20.819348", "step": 3049, "epoch": 2 }, { "type": "loss", "content": 0.006397690158337355, "timestamp": "2025-09-10 02:22:20.826389", "step": 3050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:20.858993", "step": 3050, "epoch": 2 }, { "type": "loss", "content": 0.0005857815849594772, "timestamp": "2025-09-10 02:22:20.866794", "step": 3051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:20.897758", "step": 3051, "epoch": 2 }, { "type": "loss", "content": 0.0018613949650898576, "timestamp": "2025-09-10 02:22:20.925941", "step": 3052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:22:20.963332", "step": 3052, "epoch": 2 }, { "type": "loss", "content": 0.009978823363780975, "timestamp": "2025-09-10 02:22:20.979018", "step": 3053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:21.010775", "step": 3053, "epoch": 2 }, { "type": "loss", "content": 0.00379360793158412, "timestamp": "2025-09-10 02:22:21.021623", "step": 3054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:21.053164", "step": 3054, "epoch": 2 }, { "type": "loss", "content": 0.0018329472513869405, "timestamp": "2025-09-10 02:22:21.060677", "step": 3055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:21.091584", "step": 3055, "epoch": 2 }, { "type": "loss", "content": 0.014588729478418827, "timestamp": "2025-09-10 02:22:21.119910", "step": 3056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:21.150206", "step": 3056, "epoch": 2 }, { "type": "loss", "content": 0.032337453216314316, "timestamp": "2025-09-10 02:22:21.154756", "step": 3057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:21.187083", "step": 3057, "epoch": 2 }, { "type": "loss", "content": 0.0008836713968776166, "timestamp": "2025-09-10 02:22:21.199654", "step": 3058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:21.231554", "step": 3058, "epoch": 2 }, { "type": "loss", "content": 0.0018349305028095841, "timestamp": "2025-09-10 02:22:21.238523", "step": 3059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:21.271396", "step": 3059, "epoch": 2 }, { "type": "loss", "content": 0.006054178345948458, "timestamp": "2025-09-10 02:22:21.303173", "step": 3060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:21.334981", "step": 3060, "epoch": 2 }, { "type": "loss", "content": 0.019653644412755966, "timestamp": "2025-09-10 02:22:21.340260", "step": 3061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:21.372363", "step": 3061, "epoch": 2 }, { "type": "loss", "content": 0.006816718727350235, "timestamp": "2025-09-10 02:22:21.384960", "step": 3062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:21.415562", "step": 3062, "epoch": 2 }, { "type": "loss", "content": 0.0038755948189646006, "timestamp": "2025-09-10 02:22:21.420127", "step": 3063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:21.450497", "step": 3063, "epoch": 2 }, { "type": "loss", "content": 0.0013827037764713168, "timestamp": "2025-09-10 02:22:21.483511", "step": 3064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:21.518804", "step": 3064, "epoch": 2 }, { "type": "loss", "content": 0.010334816761314869, "timestamp": "2025-09-10 02:22:21.531426", "step": 3065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:21.564761", "step": 3065, "epoch": 2 }, { "type": "loss", "content": 0.011374552734196186, "timestamp": "2025-09-10 02:22:21.572062", "step": 3066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:21.605520", "step": 3066, "epoch": 2 }, { "type": "loss", "content": 0.006082989741116762, "timestamp": "2025-09-10 02:22:21.609560", "step": 3067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:21.645832", "step": 3067, "epoch": 2 }, { "type": "loss", "content": 0.014264583587646484, "timestamp": "2025-09-10 02:22:21.680079", "step": 3068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:21.710880", "step": 3068, "epoch": 2 }, { "type": "loss", "content": 0.007734424900263548, "timestamp": "2025-09-10 02:22:21.713191", "step": 3069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:21.744035", "step": 3069, "epoch": 2 }, { "type": "loss", "content": 0.017366407439112663, "timestamp": "2025-09-10 02:22:21.754021", "step": 3070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:21.784755", "step": 3070, "epoch": 2 }, { "type": "loss", "content": 0.003733862191438675, "timestamp": "2025-09-10 02:22:21.791537", "step": 3071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:21.822650", "step": 3071, "epoch": 2 }, { "type": "loss", "content": 0.0025246471632272005, "timestamp": "2025-09-10 02:22:21.850485", "step": 3072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:21.881411", "step": 3072, "epoch": 2 }, { "type": "loss", "content": 0.013509529642760754, "timestamp": "2025-09-10 02:22:21.889415", "step": 3073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:21.924665", "step": 3073, "epoch": 2 }, { "type": "loss", "content": 0.00036960511351935565, "timestamp": "2025-09-10 02:22:21.938376", "step": 3074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:21.973789", "step": 3074, "epoch": 2 }, { "type": "loss", "content": 0.0023911669850349426, "timestamp": "2025-09-10 02:22:21.984903", "step": 3075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:22.039465", "step": 3075, "epoch": 2 }, { "type": "loss", "content": 0.0024533343967050314, "timestamp": "2025-09-10 02:22:22.076033", "step": 3076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:22.113073", "step": 3076, "epoch": 2 }, { "type": "loss", "content": 0.0010715676471590996, "timestamp": "2025-09-10 02:22:22.121327", "step": 3077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:22.154654", "step": 3077, "epoch": 2 }, { "type": "loss", "content": 0.0028496759478002787, "timestamp": "2025-09-10 02:22:22.161892", "step": 3078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:22.200186", "step": 3078, "epoch": 2 }, { "type": "loss", "content": 0.009175264276564121, "timestamp": "2025-09-10 02:22:22.211068", "step": 3079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:22.250256", "step": 3079, "epoch": 2 }, { "type": "loss", "content": 0.0040994067676365376, "timestamp": "2025-09-10 02:22:22.278185", "step": 3080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:22.309139", "step": 3080, "epoch": 2 }, { "type": "loss", "content": 0.005873729009181261, "timestamp": "2025-09-10 02:22:22.313893", "step": 3081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:22.345022", "step": 3081, "epoch": 2 }, { "type": "loss", "content": 0.004864770919084549, "timestamp": "2025-09-10 02:22:22.349484", "step": 3082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:22.380778", "step": 3082, "epoch": 2 }, { "type": "loss", "content": 0.0015795464860275388, "timestamp": "2025-09-10 02:22:22.384862", "step": 3083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:22.417242", "step": 3083, "epoch": 2 }, { "type": "loss", "content": 0.0015763568226248026, "timestamp": "2025-09-10 02:22:22.442602", "step": 3084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:22.474120", "step": 3084, "epoch": 2 }, { "type": "loss", "content": 0.003129825461655855, "timestamp": "2025-09-10 02:22:22.476517", "step": 3085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:22.514847", "step": 3085, "epoch": 2 }, { "type": "loss", "content": 0.0005250798421911895, "timestamp": "2025-09-10 02:22:22.530777", "step": 3086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:22.561937", "step": 3086, "epoch": 2 }, { "type": "loss", "content": 0.003833092050626874, "timestamp": "2025-09-10 02:22:22.568814", "step": 3087, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:22:33.022514", "step": 3087, "epoch": 2 }, { "type": "pplx", "content": 21153755.598216124, "timestamp": "2025-09-10 02:22:33.029278", "step": 3087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:33.064348", "step": 3087, "epoch": 2 }, { "type": "loss", "content": 0.0009679818176664412, "timestamp": "2025-09-10 02:22:33.096215", "step": 3088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:33.136391", "step": 3088, "epoch": 2 }, { "type": "loss", "content": 0.006355203688144684, "timestamp": "2025-09-10 02:22:33.141075", "step": 3089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:33.179377", "step": 3089, "epoch": 2 }, { "type": "loss", "content": 0.0007879887707531452, "timestamp": "2025-09-10 02:22:33.189546", "step": 3090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:33.227835", "step": 3090, "epoch": 2 }, { "type": "loss", "content": 0.011465544812381268, "timestamp": "2025-09-10 02:22:33.233963", "step": 3091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:33.267909", "step": 3091, "epoch": 2 }, { "type": "loss", "content": 0.022248754277825356, "timestamp": "2025-09-10 02:22:33.295620", "step": 3092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:33.328598", "step": 3092, "epoch": 2 }, { "type": "loss", "content": 0.025890696793794632, "timestamp": "2025-09-10 02:22:33.340896", "step": 3093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:33.378892", "step": 3093, "epoch": 2 }, { "type": "loss", "content": 0.001566907623782754, "timestamp": "2025-09-10 02:22:33.394508", "step": 3094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:33.427731", "step": 3094, "epoch": 2 }, { "type": "loss", "content": 0.0018820820841938257, "timestamp": "2025-09-10 02:22:33.435036", "step": 3095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:33.466008", "step": 3095, "epoch": 2 }, { "type": "loss", "content": 0.0020010853186249733, "timestamp": "2025-09-10 02:22:33.494127", "step": 3096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:33.525651", "step": 3096, "epoch": 2 }, { "type": "loss", "content": 0.0019767414778470993, "timestamp": "2025-09-10 02:22:33.528109", "step": 3097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:33.562476", "step": 3097, "epoch": 2 }, { "type": "loss", "content": 0.007151364348828793, "timestamp": "2025-09-10 02:22:33.566486", "step": 3098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:33.599120", "step": 3098, "epoch": 2 }, { "type": "loss", "content": 0.0023438245989382267, "timestamp": "2025-09-10 02:22:33.605795", "step": 3099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:33.637566", "step": 3099, "epoch": 2 }, { "type": "loss", "content": 0.0004773031105287373, "timestamp": "2025-09-10 02:22:33.662185", "step": 3100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:33.693484", "step": 3100, "epoch": 2 }, { "type": "loss", "content": 0.004419370554387569, "timestamp": "2025-09-10 02:22:33.695734", "step": 3101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:33.726823", "step": 3101, "epoch": 2 }, { "type": "loss", "content": 0.0012314915657043457, "timestamp": "2025-09-10 02:22:33.731053", "step": 3102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:33.765637", "step": 3102, "epoch": 2 }, { "type": "loss", "content": 0.007404958363622427, "timestamp": "2025-09-10 02:22:33.777847", "step": 3103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:33.809233", "step": 3103, "epoch": 2 }, { "type": "loss", "content": 0.002475725719705224, "timestamp": "2025-09-10 02:22:33.836868", "step": 3104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:33.869402", "step": 3104, "epoch": 2 }, { "type": "loss", "content": 0.024621224030852318, "timestamp": "2025-09-10 02:22:33.876148", "step": 3105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:33.907176", "step": 3105, "epoch": 2 }, { "type": "loss", "content": 0.006465516518801451, "timestamp": "2025-09-10 02:22:33.913998", "step": 3106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:33.947508", "step": 3106, "epoch": 2 }, { "type": "loss", "content": 0.0004534423351287842, "timestamp": "2025-09-10 02:22:33.960891", "step": 3107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:33.993962", "step": 3107, "epoch": 2 }, { "type": "loss", "content": 0.0025858450680971146, "timestamp": "2025-09-10 02:22:34.022012", "step": 3108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:34.053224", "step": 3108, "epoch": 2 }, { "type": "loss", "content": 0.0024823250714689493, "timestamp": "2025-09-10 02:22:34.057775", "step": 3109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:34.089247", "step": 3109, "epoch": 2 }, { "type": "loss", "content": 0.004186380188912153, "timestamp": "2025-09-10 02:22:34.099417", "step": 3110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:34.131364", "step": 3110, "epoch": 2 }, { "type": "loss", "content": 0.0007520094513893127, "timestamp": "2025-09-10 02:22:34.141494", "step": 3111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:34.172972", "step": 3111, "epoch": 2 }, { "type": "loss", "content": 0.00030438616522587836, "timestamp": "2025-09-10 02:22:34.201555", "step": 3112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:34.232666", "step": 3112, "epoch": 2 }, { "type": "loss", "content": 0.00505801709368825, "timestamp": "2025-09-10 02:22:34.234997", "step": 3113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:34.266322", "step": 3113, "epoch": 2 }, { "type": "loss", "content": 0.0030636286828666925, "timestamp": "2025-09-10 02:22:34.273511", "step": 3114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:22:34.314929", "step": 3114, "epoch": 2 }, { "type": "loss", "content": 0.039970513433218, "timestamp": "2025-09-10 02:22:34.332216", "step": 3115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:34.364789", "step": 3115, "epoch": 2 }, { "type": "loss", "content": 0.012349791824817657, "timestamp": "2025-09-10 02:22:34.397696", "step": 3116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:34.428527", "step": 3116, "epoch": 2 }, { "type": "loss", "content": 0.0034343355800956488, "timestamp": "2025-09-10 02:22:34.433349", "step": 3117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:34.463675", "step": 3117, "epoch": 2 }, { "type": "loss", "content": 0.011677572503685951, "timestamp": "2025-09-10 02:22:34.470737", "step": 3118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:34.502989", "step": 3118, "epoch": 2 }, { "type": "loss", "content": 0.0023600461427122355, "timestamp": "2025-09-10 02:22:34.515486", "step": 3119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:34.550914", "step": 3119, "epoch": 2 }, { "type": "loss", "content": 0.0012422216823324561, "timestamp": "2025-09-10 02:22:34.585815", "step": 3120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:34.616858", "step": 3120, "epoch": 2 }, { "type": "loss", "content": 0.005536832381039858, "timestamp": "2025-09-10 02:22:34.621216", "step": 3121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:34.651904", "step": 3121, "epoch": 2 }, { "type": "loss", "content": 0.003003006335347891, "timestamp": "2025-09-10 02:22:34.658901", "step": 3122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:34.689307", "step": 3122, "epoch": 2 }, { "type": "loss", "content": 0.0019031567499041557, "timestamp": "2025-09-10 02:22:34.696479", "step": 3123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:34.734404", "step": 3123, "epoch": 2 }, { "type": "loss", "content": 0.0015222270740196109, "timestamp": "2025-09-10 02:22:34.770870", "step": 3124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:34.805056", "step": 3124, "epoch": 2 }, { "type": "loss", "content": 0.003712509525939822, "timestamp": "2025-09-10 02:22:34.809662", "step": 3125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:34.840686", "step": 3125, "epoch": 2 }, { "type": "loss", "content": 0.0011844148393720388, "timestamp": "2025-09-10 02:22:34.843120", "step": 3126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:34.874283", "step": 3126, "epoch": 2 }, { "type": "loss", "content": 0.009678156115114689, "timestamp": "2025-09-10 02:22:34.885863", "step": 3127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:34.917147", "step": 3127, "epoch": 2 }, { "type": "loss", "content": 0.001543865422718227, "timestamp": "2025-09-10 02:22:34.945213", "step": 3128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:34.975336", "step": 3128, "epoch": 2 }, { "type": "loss", "content": 0.0022532050497829914, "timestamp": "2025-09-10 02:22:34.977838", "step": 3129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:35.009158", "step": 3129, "epoch": 2 }, { "type": "loss", "content": 0.0002928555477410555, "timestamp": "2025-09-10 02:22:35.021490", "step": 3130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:35.062819", "step": 3130, "epoch": 2 }, { "type": "loss", "content": 0.0005914249341003597, "timestamp": "2025-09-10 02:22:35.070289", "step": 3131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:35.115911", "step": 3131, "epoch": 2 }, { "type": "loss", "content": 0.0007928982959128916, "timestamp": "2025-09-10 02:22:35.144531", "step": 3132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:35.184890", "step": 3132, "epoch": 2 }, { "type": "loss", "content": 0.0026510064490139484, "timestamp": "2025-09-10 02:22:35.193172", "step": 3133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:35.234127", "step": 3133, "epoch": 2 }, { "type": "loss", "content": 0.004251073580235243, "timestamp": "2025-09-10 02:22:35.241154", "step": 3134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:35.275464", "step": 3134, "epoch": 2 }, { "type": "loss", "content": 0.003887306433171034, "timestamp": "2025-09-10 02:22:35.286274", "step": 3135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:35.319472", "step": 3135, "epoch": 2 }, { "type": "loss", "content": 0.008226651698350906, "timestamp": "2025-09-10 02:22:35.347308", "step": 3136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:35.377446", "step": 3136, "epoch": 2 }, { "type": "loss", "content": 0.013611420057713985, "timestamp": "2025-09-10 02:22:35.381890", "step": 3137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:35.414659", "step": 3137, "epoch": 2 }, { "type": "loss", "content": 0.0004120334633626044, "timestamp": "2025-09-10 02:22:35.421833", "step": 3138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:35.458490", "step": 3138, "epoch": 2 }, { "type": "loss", "content": 0.002287933137267828, "timestamp": "2025-09-10 02:22:35.472204", "step": 3139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:35.506202", "step": 3139, "epoch": 2 }, { "type": "loss", "content": 0.00017298969032708555, "timestamp": "2025-09-10 02:22:35.533829", "step": 3140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:35.570686", "step": 3140, "epoch": 2 }, { "type": "loss", "content": 0.00024141445464920253, "timestamp": "2025-09-10 02:22:35.586110", "step": 3141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:35.617668", "step": 3141, "epoch": 2 }, { "type": "loss", "content": 0.0008984781452454627, "timestamp": "2025-09-10 02:22:35.629397", "step": 3142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:35.669036", "step": 3142, "epoch": 2 }, { "type": "loss", "content": 0.010877908207476139, "timestamp": "2025-09-10 02:22:35.684699", "step": 3143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:35.716267", "step": 3143, "epoch": 2 }, { "type": "loss", "content": 0.018387990072369576, "timestamp": "2025-09-10 02:22:35.743904", "step": 3144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:22:35.777546", "step": 3144, "epoch": 2 }, { "type": "loss", "content": 0.0019596496131271124, "timestamp": "2025-09-10 02:22:35.790640", "step": 3145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:35.822410", "step": 3145, "epoch": 2 }, { "type": "loss", "content": 0.00041655570385046303, "timestamp": "2025-09-10 02:22:35.829175", "step": 3146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:35.861912", "step": 3146, "epoch": 2 }, { "type": "loss", "content": 0.0006447642226703465, "timestamp": "2025-09-10 02:22:35.868664", "step": 3147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:22:35.915220", "step": 3147, "epoch": 2 }, { "type": "loss", "content": 0.0039002900011837482, "timestamp": "2025-09-10 02:22:35.955086", "step": 3148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:35.987428", "step": 3148, "epoch": 2 }, { "type": "loss", "content": 0.0022350053768604994, "timestamp": "2025-09-10 02:22:35.992181", "step": 3149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:36.023626", "step": 3149, "epoch": 2 }, { "type": "loss", "content": 0.005409142933785915, "timestamp": "2025-09-10 02:22:36.030476", "step": 3150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:36.062820", "step": 3150, "epoch": 2 }, { "type": "loss", "content": 0.0011333615984767675, "timestamp": "2025-09-10 02:22:36.070256", "step": 3151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:36.100823", "step": 3151, "epoch": 2 }, { "type": "loss", "content": 0.0002690895344130695, "timestamp": "2025-09-10 02:22:36.128651", "step": 3152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:36.161113", "step": 3152, "epoch": 2 }, { "type": "loss", "content": 0.017261261120438576, "timestamp": "2025-09-10 02:22:36.173823", "step": 3153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:36.208430", "step": 3153, "epoch": 2 }, { "type": "loss", "content": 0.0018687748815864325, "timestamp": "2025-09-10 02:22:36.222132", "step": 3154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:36.255541", "step": 3154, "epoch": 2 }, { "type": "loss", "content": 0.0018482712330296636, "timestamp": "2025-09-10 02:22:36.262692", "step": 3155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:36.293994", "step": 3155, "epoch": 2 }, { "type": "loss", "content": 0.0002486660669092089, "timestamp": "2025-09-10 02:22:36.322466", "step": 3156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:36.353796", "step": 3156, "epoch": 2 }, { "type": "loss", "content": 0.0037758280523121357, "timestamp": "2025-09-10 02:22:36.355847", "step": 3157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:36.389754", "step": 3157, "epoch": 2 }, { "type": "loss", "content": 0.0007112511666491628, "timestamp": "2025-09-10 02:22:36.392450", "step": 3158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:36.423967", "step": 3158, "epoch": 2 }, { "type": "loss", "content": 0.000233599086641334, "timestamp": "2025-09-10 02:22:36.436441", "step": 3159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:36.470084", "step": 3159, "epoch": 2 }, { "type": "loss", "content": 0.00017964192375075072, "timestamp": "2025-09-10 02:22:36.493663", "step": 3160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:36.526932", "step": 3160, "epoch": 2 }, { "type": "loss", "content": 0.004801702219992876, "timestamp": "2025-09-10 02:22:36.540081", "step": 3161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:36.584674", "step": 3161, "epoch": 2 }, { "type": "loss", "content": 0.020199043676257133, "timestamp": "2025-09-10 02:22:36.591734", "step": 3162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:36.625173", "step": 3162, "epoch": 2 }, { "type": "loss", "content": 0.0010675977682694793, "timestamp": "2025-09-10 02:22:36.636048", "step": 3163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:36.673457", "step": 3163, "epoch": 2 }, { "type": "loss", "content": 0.0003448014031164348, "timestamp": "2025-09-10 02:22:36.701119", "step": 3164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:36.734990", "step": 3164, "epoch": 2 }, { "type": "loss", "content": 0.00020471213792916387, "timestamp": "2025-09-10 02:22:36.739144", "step": 3165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:36.787514", "step": 3165, "epoch": 2 }, { "type": "loss", "content": 0.0005103556322865188, "timestamp": "2025-09-10 02:22:36.794029", "step": 3166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:36.827768", "step": 3166, "epoch": 2 }, { "type": "loss", "content": 0.002306754468008876, "timestamp": "2025-09-10 02:22:36.830155", "step": 3167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:36.863788", "step": 3167, "epoch": 2 }, { "type": "loss", "content": 0.0009209056152030826, "timestamp": "2025-09-10 02:22:36.891997", "step": 3168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:36.925434", "step": 3168, "epoch": 2 }, { "type": "loss", "content": 0.00017630930233281106, "timestamp": "2025-09-10 02:22:36.938464", "step": 3169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:36.969216", "step": 3169, "epoch": 2 }, { "type": "loss", "content": 0.0032021531369537115, "timestamp": "2025-09-10 02:22:36.976342", "step": 3170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:37.008954", "step": 3170, "epoch": 2 }, { "type": "loss", "content": 0.00039557431591674685, "timestamp": "2025-09-10 02:22:37.013179", "step": 3171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:37.043748", "step": 3171, "epoch": 2 }, { "type": "loss", "content": 0.0010184214916080236, "timestamp": "2025-09-10 02:22:37.074953", "step": 3172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:37.105520", "step": 3172, "epoch": 2 }, { "type": "loss", "content": 0.00031294874497689307, "timestamp": "2025-09-10 02:22:37.113472", "step": 3173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:37.149177", "step": 3173, "epoch": 2 }, { "type": "loss", "content": 0.0008610020158812404, "timestamp": "2025-09-10 02:22:37.162901", "step": 3174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:37.197818", "step": 3174, "epoch": 2 }, { "type": "loss", "content": 0.0015279522631317377, "timestamp": "2025-09-10 02:22:37.211216", "step": 3175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:37.242396", "step": 3175, "epoch": 2 }, { "type": "loss", "content": 0.010398059152066708, "timestamp": "2025-09-10 02:22:37.273537", "step": 3176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:37.304358", "step": 3176, "epoch": 2 }, { "type": "loss", "content": 0.0012553682317957282, "timestamp": "2025-09-10 02:22:37.306608", "step": 3177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:37.337888", "step": 3177, "epoch": 2 }, { "type": "loss", "content": 0.0010120292427018285, "timestamp": "2025-09-10 02:22:37.344761", "step": 3178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:37.375351", "step": 3178, "epoch": 2 }, { "type": "loss", "content": 0.003787730587646365, "timestamp": "2025-09-10 02:22:37.383017", "step": 3179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:22:37.413647", "step": 3179, "epoch": 2 }, { "type": "loss", "content": 0.013505556620657444, "timestamp": "2025-09-10 02:22:37.437644", "step": 3180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:37.468195", "step": 3180, "epoch": 2 }, { "type": "loss", "content": 0.003877087030559778, "timestamp": "2025-09-10 02:22:37.470460", "step": 3181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:37.501096", "step": 3181, "epoch": 2 }, { "type": "loss", "content": 0.00047005919623188674, "timestamp": "2025-09-10 02:22:37.511281", "step": 3182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:37.541506", "step": 3182, "epoch": 2 }, { "type": "loss", "content": 0.0027071668300777674, "timestamp": "2025-09-10 02:22:37.545685", "step": 3183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:37.581163", "step": 3183, "epoch": 2 }, { "type": "loss", "content": 0.0015775591600686312, "timestamp": "2025-09-10 02:22:37.606764", "step": 3184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:37.637287", "step": 3184, "epoch": 2 }, { "type": "loss", "content": 0.001980002736672759, "timestamp": "2025-09-10 02:22:37.642280", "step": 3185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:37.673723", "step": 3185, "epoch": 2 }, { "type": "loss", "content": 0.0013841536128893495, "timestamp": "2025-09-10 02:22:37.685883", "step": 3186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:37.718254", "step": 3186, "epoch": 2 }, { "type": "loss", "content": 0.0009568403474986553, "timestamp": "2025-09-10 02:22:37.725579", "step": 3187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:37.757250", "step": 3187, "epoch": 2 }, { "type": "loss", "content": 0.0486009381711483, "timestamp": "2025-09-10 02:22:37.785068", "step": 3188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:37.817158", "step": 3188, "epoch": 2 }, { "type": "loss", "content": 0.008468760177493095, "timestamp": "2025-09-10 02:22:37.826681", "step": 3189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:37.858796", "step": 3189, "epoch": 2 }, { "type": "loss", "content": 0.004440871067345142, "timestamp": "2025-09-10 02:22:37.868534", "step": 3190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:37.899339", "step": 3190, "epoch": 2 }, { "type": "loss", "content": 0.039386093616485596, "timestamp": "2025-09-10 02:22:37.906455", "step": 3191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:37.937573", "step": 3191, "epoch": 2 }, { "type": "loss", "content": 0.003340385155752301, "timestamp": "2025-09-10 02:22:37.965889", "step": 3192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:37.997269", "step": 3192, "epoch": 2 }, { "type": "loss", "content": 0.004237064626067877, "timestamp": "2025-09-10 02:22:38.002480", "step": 3193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:38.034109", "step": 3193, "epoch": 2 }, { "type": "loss", "content": 0.010632021352648735, "timestamp": "2025-09-10 02:22:38.045869", "step": 3194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:38.084477", "step": 3194, "epoch": 2 }, { "type": "loss", "content": 0.0007325800834223628, "timestamp": "2025-09-10 02:22:38.100425", "step": 3195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:38.131176", "step": 3195, "epoch": 2 }, { "type": "loss", "content": 5.4420535889221355e-05, "timestamp": "2025-09-10 02:22:38.156710", "step": 3196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:38.189240", "step": 3196, "epoch": 2 }, { "type": "loss", "content": 0.000553667254280299, "timestamp": "2025-09-10 02:22:38.193935", "step": 3197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:22:38.235326", "step": 3197, "epoch": 2 }, { "type": "loss", "content": 0.02702312171459198, "timestamp": "2025-09-10 02:22:38.252626", "step": 3198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:38.283741", "step": 3198, "epoch": 2 }, { "type": "loss", "content": 0.000169062870554626, "timestamp": "2025-09-10 02:22:38.290898", "step": 3199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:38.322708", "step": 3199, "epoch": 2 }, { "type": "loss", "content": 0.0018326956778764725, "timestamp": "2025-09-10 02:22:38.346260", "step": 3200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:38.377001", "step": 3200, "epoch": 2 }, { "type": "loss", "content": 0.00025423362967558205, "timestamp": "2025-09-10 02:22:38.382396", "step": 3201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:22:38.422092", "step": 3201, "epoch": 2 }, { "type": "loss", "content": 0.00021461385767906904, "timestamp": "2025-09-10 02:22:38.438228", "step": 3202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:38.473102", "step": 3202, "epoch": 2 }, { "type": "loss", "content": 0.0002993656671606004, "timestamp": "2025-09-10 02:22:38.486818", "step": 3203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:38.522162", "step": 3203, "epoch": 2 }, { "type": "loss", "content": 0.0004899102495983243, "timestamp": "2025-09-10 02:22:38.557079", "step": 3204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:38.587494", "step": 3204, "epoch": 2 }, { "type": "loss", "content": 0.0012291016755625606, "timestamp": "2025-09-10 02:22:38.589920", "step": 3205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:38.620619", "step": 3205, "epoch": 2 }, { "type": "loss", "content": 0.01229874137789011, "timestamp": "2025-09-10 02:22:38.624935", "step": 3206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:38.656016", "step": 3206, "epoch": 2 }, { "type": "loss", "content": 0.0004454140434972942, "timestamp": "2025-09-10 02:22:38.666814", "step": 3207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:38.707752", "step": 3207, "epoch": 2 }, { "type": "loss", "content": 0.00010696732351789251, "timestamp": "2025-09-10 02:22:38.735577", "step": 3208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:38.766238", "step": 3208, "epoch": 2 }, { "type": "loss", "content": 0.0006230357685126364, "timestamp": "2025-09-10 02:22:38.774174", "step": 3209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:22:38.816179", "step": 3209, "epoch": 2 }, { "type": "loss", "content": 0.0018624786753207445, "timestamp": "2025-09-10 02:22:38.833519", "step": 3210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:38.872502", "step": 3210, "epoch": 2 }, { "type": "loss", "content": 0.0004688594490289688, "timestamp": "2025-09-10 02:22:38.888433", "step": 3211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:38.920488", "step": 3211, "epoch": 2 }, { "type": "loss", "content": 0.0005589794600382447, "timestamp": "2025-09-10 02:22:38.947973", "step": 3212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:38.980672", "step": 3212, "epoch": 2 }, { "type": "loss", "content": 0.001154548255726695, "timestamp": "2025-09-10 02:22:38.988108", "step": 3213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:39.018998", "step": 3213, "epoch": 2 }, { "type": "loss", "content": 0.023087533190846443, "timestamp": "2025-09-10 02:22:39.030941", "step": 3214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.064641", "step": 3214, "epoch": 2 }, { "type": "loss", "content": 0.000464627897599712, "timestamp": "2025-09-10 02:22:39.071635", "step": 3215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.104297", "step": 3215, "epoch": 2 }, { "type": "loss", "content": 0.00038381904596462846, "timestamp": "2025-09-10 02:22:39.132049", "step": 3216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:39.165104", "step": 3216, "epoch": 2 }, { "type": "loss", "content": 0.001965318340808153, "timestamp": "2025-09-10 02:22:39.178420", "step": 3217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:39.211715", "step": 3217, "epoch": 2 }, { "type": "loss", "content": 0.0007221942069008946, "timestamp": "2025-09-10 02:22:39.225098", "step": 3218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.255813", "step": 3218, "epoch": 2 }, { "type": "loss", "content": 0.0005813446477986872, "timestamp": "2025-09-10 02:22:39.262704", "step": 3219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:39.293375", "step": 3219, "epoch": 2 }, { "type": "loss", "content": 0.0018721247324720025, "timestamp": "2025-09-10 02:22:39.318543", "step": 3220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:39.349428", "step": 3220, "epoch": 2 }, { "type": "loss", "content": 0.0010139280930161476, "timestamp": "2025-09-10 02:22:39.353948", "step": 3221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:39.384772", "step": 3221, "epoch": 2 }, { "type": "loss", "content": 0.0010682783322408795, "timestamp": "2025-09-10 02:22:39.395352", "step": 3222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:39.427058", "step": 3222, "epoch": 2 }, { "type": "loss", "content": 0.014338502660393715, "timestamp": "2025-09-10 02:22:39.433513", "step": 3223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:39.465088", "step": 3223, "epoch": 2 }, { "type": "loss", "content": 0.01097325049340725, "timestamp": "2025-09-10 02:22:39.493428", "step": 3224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.525266", "step": 3224, "epoch": 2 }, { "type": "loss", "content": 0.00029258467839099467, "timestamp": "2025-09-10 02:22:39.529639", "step": 3225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:39.561053", "step": 3225, "epoch": 2 }, { "type": "loss", "content": 0.00042427852167747915, "timestamp": "2025-09-10 02:22:39.567975", "step": 3226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:39.600915", "step": 3226, "epoch": 2 }, { "type": "loss", "content": 0.0023932938929647207, "timestamp": "2025-09-10 02:22:39.605326", "step": 3227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:39.637730", "step": 3227, "epoch": 2 }, { "type": "loss", "content": 0.0120490537956357, "timestamp": "2025-09-10 02:22:39.666437", "step": 3228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:39.698073", "step": 3228, "epoch": 2 }, { "type": "loss", "content": 0.0008032119949348271, "timestamp": "2025-09-10 02:22:39.700494", "step": 3229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.732562", "step": 3229, "epoch": 2 }, { "type": "loss", "content": 0.0006666731787845492, "timestamp": "2025-09-10 02:22:39.739746", "step": 3230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:39.774952", "step": 3230, "epoch": 2 }, { "type": "loss", "content": 0.01671520434319973, "timestamp": "2025-09-10 02:22:39.788296", "step": 3231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:39.839699", "step": 3231, "epoch": 2 }, { "type": "loss", "content": 0.0014305815566331148, "timestamp": "2025-09-10 02:22:39.867652", "step": 3232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:39.899367", "step": 3232, "epoch": 2 }, { "type": "loss", "content": 0.0004569535667542368, "timestamp": "2025-09-10 02:22:39.904099", "step": 3233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:39.934344", "step": 3233, "epoch": 2 }, { "type": "loss", "content": 0.00045302906073629856, "timestamp": "2025-09-10 02:22:39.938752", "step": 3234, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:22:49.945100", "step": 3234, "epoch": 2 }, { "type": "pplx", "content": 23502743.08364132, "timestamp": "2025-09-10 02:22:49.948126", "step": 3234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:49.978817", "step": 3234, "epoch": 2 }, { "type": "loss", "content": 0.001856299233622849, "timestamp": "2025-09-10 02:22:49.982354", "step": 3235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:50.013341", "step": 3235, "epoch": 2 }, { "type": "loss", "content": 0.000491947284899652, "timestamp": "2025-09-10 02:22:50.043579", "step": 3236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:50.073820", "step": 3236, "epoch": 2 }, { "type": "loss", "content": 0.0004294110112823546, "timestamp": "2025-09-10 02:22:50.078443", "step": 3237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:50.108498", "step": 3237, "epoch": 2 }, { "type": "loss", "content": 0.0006372739444486797, "timestamp": "2025-09-10 02:22:50.121008", "step": 3238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:50.152654", "step": 3238, "epoch": 2 }, { "type": "loss", "content": 0.0037969518452882767, "timestamp": "2025-09-10 02:22:50.163174", "step": 3239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:50.193750", "step": 3239, "epoch": 2 }, { "type": "loss", "content": 0.0004036373575218022, "timestamp": "2025-09-10 02:22:50.224719", "step": 3240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:50.254784", "step": 3240, "epoch": 2 }, { "type": "loss", "content": 0.0003846238541882485, "timestamp": "2025-09-10 02:22:50.259442", "step": 3241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:50.289815", "step": 3241, "epoch": 2 }, { "type": "loss", "content": 0.02323022112250328, "timestamp": "2025-09-10 02:22:50.300221", "step": 3242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:50.331983", "step": 3242, "epoch": 2 }, { "type": "loss", "content": 0.0003217768098693341, "timestamp": "2025-09-10 02:22:50.339433", "step": 3243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 784 ], "flops": 23255845310656 }, "timestamp": "2025-09-10 02:22:50.404351", "step": 3243, "epoch": 2 }, { "type": "loss", "content": 0.0018644100055098534, "timestamp": "2025-09-10 02:22:50.452450", "step": 3244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:50.483616", "step": 3244, "epoch": 2 }, { "type": "loss", "content": 0.001525462488643825, "timestamp": "2025-09-10 02:22:50.493622", "step": 3245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:50.528687", "step": 3245, "epoch": 2 }, { "type": "loss", "content": 0.0013417869340628386, "timestamp": "2025-09-10 02:22:50.532530", "step": 3246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:50.563727", "step": 3246, "epoch": 2 }, { "type": "loss", "content": 0.0008608666248619556, "timestamp": "2025-09-10 02:22:50.574306", "step": 3247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:50.605993", "step": 3247, "epoch": 2 }, { "type": "loss", "content": 0.00041623544530011714, "timestamp": "2025-09-10 02:22:50.636683", "step": 3248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:22:50.672229", "step": 3248, "epoch": 2 }, { "type": "loss", "content": 0.0026598642580211163, "timestamp": "2025-09-10 02:22:50.688083", "step": 3249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:50.722087", "step": 3249, "epoch": 2 }, { "type": "loss", "content": 0.0033846586011350155, "timestamp": "2025-09-10 02:22:50.724450", "step": 3250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:50.760297", "step": 3250, "epoch": 2 }, { "type": "loss", "content": 0.00024376294459216297, "timestamp": "2025-09-10 02:22:50.766874", "step": 3251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:50.797846", "step": 3251, "epoch": 2 }, { "type": "loss", "content": 0.0008604326867498457, "timestamp": "2025-09-10 02:22:50.826404", "step": 3252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:50.859109", "step": 3252, "epoch": 2 }, { "type": "loss", "content": 0.0002787252014968544, "timestamp": "2025-09-10 02:22:50.861302", "step": 3253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:50.891014", "step": 3253, "epoch": 2 }, { "type": "loss", "content": 0.0013136464403942227, "timestamp": "2025-09-10 02:22:50.896621", "step": 3254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:50.929511", "step": 3254, "epoch": 2 }, { "type": "loss", "content": 0.0002793243620544672, "timestamp": "2025-09-10 02:22:50.935575", "step": 3255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:50.968465", "step": 3255, "epoch": 2 }, { "type": "loss", "content": 0.0005823360406793654, "timestamp": "2025-09-10 02:22:50.996851", "step": 3256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:51.028652", "step": 3256, "epoch": 2 }, { "type": "loss", "content": 0.009857832454144955, "timestamp": "2025-09-10 02:22:51.033616", "step": 3257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.069361", "step": 3257, "epoch": 2 }, { "type": "loss", "content": 0.0003967168158851564, "timestamp": "2025-09-10 02:22:51.073645", "step": 3258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:51.106717", "step": 3258, "epoch": 2 }, { "type": "loss", "content": 0.0002505451557226479, "timestamp": "2025-09-10 02:22:51.114402", "step": 3259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:51.148211", "step": 3259, "epoch": 2 }, { "type": "loss", "content": 0.0007157556829042733, "timestamp": "2025-09-10 02:22:51.175925", "step": 3260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:22:51.216586", "step": 3260, "epoch": 2 }, { "type": "loss", "content": 0.00013806803326588124, "timestamp": "2025-09-10 02:22:51.231738", "step": 3261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:51.268896", "step": 3261, "epoch": 2 }, { "type": "loss", "content": 0.00021779598318971694, "timestamp": "2025-09-10 02:22:51.278799", "step": 3262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.316727", "step": 3262, "epoch": 2 }, { "type": "loss", "content": 0.008616751991212368, "timestamp": "2025-09-10 02:22:51.320862", "step": 3263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:22:51.367488", "step": 3263, "epoch": 2 }, { "type": "loss", "content": 0.0011451850878074765, "timestamp": "2025-09-10 02:22:51.406167", "step": 3264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:51.442444", "step": 3264, "epoch": 2 }, { "type": "loss", "content": 0.0010292161023244262, "timestamp": "2025-09-10 02:22:51.455509", "step": 3265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.502272", "step": 3265, "epoch": 2 }, { "type": "loss", "content": 0.004708148539066315, "timestamp": "2025-09-10 02:22:51.506216", "step": 3266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:51.540036", "step": 3266, "epoch": 2 }, { "type": "loss", "content": 0.0003200080245733261, "timestamp": "2025-09-10 02:22:51.547440", "step": 3267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:51.597175", "step": 3267, "epoch": 2 }, { "type": "loss", "content": 0.0003564673534128815, "timestamp": "2025-09-10 02:22:51.628072", "step": 3268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:51.660155", "step": 3268, "epoch": 2 }, { "type": "loss", "content": 0.001860837102867663, "timestamp": "2025-09-10 02:22:51.664785", "step": 3269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.698093", "step": 3269, "epoch": 2 }, { "type": "loss", "content": 0.0005392608582042158, "timestamp": "2025-09-10 02:22:51.703230", "step": 3270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:51.742970", "step": 3270, "epoch": 2 }, { "type": "loss", "content": 0.0032175458036363125, "timestamp": "2025-09-10 02:22:51.750416", "step": 3271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:51.783389", "step": 3271, "epoch": 2 }, { "type": "loss", "content": 0.009787830524146557, "timestamp": "2025-09-10 02:22:51.811842", "step": 3272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.842319", "step": 3272, "epoch": 2 }, { "type": "loss", "content": 0.0008516389061696827, "timestamp": "2025-09-10 02:22:51.844388", "step": 3273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:51.878358", "step": 3273, "epoch": 2 }, { "type": "loss", "content": 0.0036891864147037268, "timestamp": "2025-09-10 02:22:51.882661", "step": 3274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:51.916027", "step": 3274, "epoch": 2 }, { "type": "loss", "content": 0.00042585088522173464, "timestamp": "2025-09-10 02:22:51.924005", "step": 3275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:51.954832", "step": 3275, "epoch": 2 }, { "type": "loss", "content": 0.0004672827199101448, "timestamp": "2025-09-10 02:22:51.983679", "step": 3276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:52.015837", "step": 3276, "epoch": 2 }, { "type": "loss", "content": 0.00011190387886017561, "timestamp": "2025-09-10 02:22:52.020512", "step": 3277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:52.054019", "step": 3277, "epoch": 2 }, { "type": "loss", "content": 0.0003633495362009853, "timestamp": "2025-09-10 02:22:52.060849", "step": 3278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:52.094823", "step": 3278, "epoch": 2 }, { "type": "loss", "content": 0.0030928533524274826, "timestamp": "2025-09-10 02:22:52.107417", "step": 3279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:52.140009", "step": 3279, "epoch": 2 }, { "type": "loss", "content": 0.0003447837952990085, "timestamp": "2025-09-10 02:22:52.172284", "step": 3280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:52.207809", "step": 3280, "epoch": 2 }, { "type": "loss", "content": 0.0003286560531705618, "timestamp": "2025-09-10 02:22:52.210106", "step": 3281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:52.240739", "step": 3281, "epoch": 2 }, { "type": "loss", "content": 0.0026785004884004593, "timestamp": "2025-09-10 02:22:52.251175", "step": 3282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:22:52.287644", "step": 3282, "epoch": 2 }, { "type": "loss", "content": 0.004041456617414951, "timestamp": "2025-09-10 02:22:52.301581", "step": 3283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:52.333684", "step": 3283, "epoch": 2 }, { "type": "loss", "content": 0.004617027007043362, "timestamp": "2025-09-10 02:22:52.361945", "step": 3284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:52.393199", "step": 3284, "epoch": 2 }, { "type": "loss", "content": 0.0017622795421630144, "timestamp": "2025-09-10 02:22:52.398639", "step": 3285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:52.429068", "step": 3285, "epoch": 2 }, { "type": "loss", "content": 0.0006238414789550006, "timestamp": "2025-09-10 02:22:52.439567", "step": 3286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:52.470208", "step": 3286, "epoch": 2 }, { "type": "loss", "content": 0.005539960693567991, "timestamp": "2025-09-10 02:22:52.474511", "step": 3287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:52.504593", "step": 3287, "epoch": 2 }, { "type": "loss", "content": 0.0032464484684169292, "timestamp": "2025-09-10 02:22:52.533337", "step": 3288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:52.563908", "step": 3288, "epoch": 2 }, { "type": "loss", "content": 0.0018969980301335454, "timestamp": "2025-09-10 02:22:52.572641", "step": 3289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:52.603471", "step": 3289, "epoch": 2 }, { "type": "loss", "content": 0.0011059996904805303, "timestamp": "2025-09-10 02:22:52.607788", "step": 3290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:52.639390", "step": 3290, "epoch": 2 }, { "type": "loss", "content": 0.009358406998217106, "timestamp": "2025-09-10 02:22:52.651643", "step": 3291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:52.683118", "step": 3291, "epoch": 2 }, { "type": "loss", "content": 0.0013368077343329787, "timestamp": "2025-09-10 02:22:52.711542", "step": 3292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:52.742817", "step": 3292, "epoch": 2 }, { "type": "loss", "content": 0.013290916569530964, "timestamp": "2025-09-10 02:22:52.744954", "step": 3293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:52.776523", "step": 3293, "epoch": 2 }, { "type": "loss", "content": 0.004001455847173929, "timestamp": "2025-09-10 02:22:52.787595", "step": 3294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:52.818335", "step": 3294, "epoch": 2 }, { "type": "loss", "content": 0.00019497517496347427, "timestamp": "2025-09-10 02:22:52.822799", "step": 3295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:52.855538", "step": 3295, "epoch": 2 }, { "type": "loss", "content": 0.0005138739361427724, "timestamp": "2025-09-10 02:22:52.883393", "step": 3296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:52.914958", "step": 3296, "epoch": 2 }, { "type": "loss", "content": 0.008893569000065327, "timestamp": "2025-09-10 02:22:52.920142", "step": 3297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:52.959182", "step": 3297, "epoch": 2 }, { "type": "loss", "content": 0.0008259877795353532, "timestamp": "2025-09-10 02:22:52.975036", "step": 3298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:53.005523", "step": 3298, "epoch": 2 }, { "type": "loss", "content": 0.00016621073882561177, "timestamp": "2025-09-10 02:22:53.013103", "step": 3299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:53.046542", "step": 3299, "epoch": 2 }, { "type": "loss", "content": 0.002620183164253831, "timestamp": "2025-09-10 02:22:53.075158", "step": 3300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:53.105095", "step": 3300, "epoch": 2 }, { "type": "loss", "content": 0.0029605585150420666, "timestamp": "2025-09-10 02:22:53.107245", "step": 3301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:53.137003", "step": 3301, "epoch": 2 }, { "type": "loss", "content": 0.0031393535900861025, "timestamp": "2025-09-10 02:22:53.143954", "step": 3302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:53.174948", "step": 3302, "epoch": 2 }, { "type": "loss", "content": 0.002589694457128644, "timestamp": "2025-09-10 02:22:53.182628", "step": 3303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:53.212926", "step": 3303, "epoch": 2 }, { "type": "loss", "content": 0.0028403718024492264, "timestamp": "2025-09-10 02:22:53.241438", "step": 3304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:53.271110", "step": 3304, "epoch": 2 }, { "type": "loss", "content": 0.000695100927259773, "timestamp": "2025-09-10 02:22:53.275923", "step": 3305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:53.306170", "step": 3305, "epoch": 2 }, { "type": "loss", "content": 0.012687050737440586, "timestamp": "2025-09-10 02:22:53.313990", "step": 3306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:53.343830", "step": 3306, "epoch": 2 }, { "type": "loss", "content": 0.03439050540328026, "timestamp": "2025-09-10 02:22:53.350885", "step": 3307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:53.381182", "step": 3307, "epoch": 2 }, { "type": "loss", "content": 0.02955719642341137, "timestamp": "2025-09-10 02:22:53.409822", "step": 3308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:53.441083", "step": 3308, "epoch": 2 }, { "type": "loss", "content": 0.0008426779531873763, "timestamp": "2025-09-10 02:22:53.449713", "step": 3309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:53.480771", "step": 3309, "epoch": 2 }, { "type": "loss", "content": 0.014351406134665012, "timestamp": "2025-09-10 02:22:53.488153", "step": 3310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:53.519052", "step": 3310, "epoch": 2 }, { "type": "loss", "content": 0.0006407542387023568, "timestamp": "2025-09-10 02:22:53.529339", "step": 3311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:53.562755", "step": 3311, "epoch": 2 }, { "type": "loss", "content": 0.013966037891805172, "timestamp": "2025-09-10 02:22:53.597025", "step": 3312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:53.627652", "step": 3312, "epoch": 2 }, { "type": "loss", "content": 0.0011689442908391356, "timestamp": "2025-09-10 02:22:53.635469", "step": 3313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:53.665966", "step": 3313, "epoch": 2 }, { "type": "loss", "content": 0.0012120773317292333, "timestamp": "2025-09-10 02:22:53.670069", "step": 3314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:53.701243", "step": 3314, "epoch": 2 }, { "type": "loss", "content": 0.011072760447859764, "timestamp": "2025-09-10 02:22:53.705361", "step": 3315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:53.735480", "step": 3315, "epoch": 2 }, { "type": "loss", "content": 0.01113554835319519, "timestamp": "2025-09-10 02:22:53.759070", "step": 3316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:53.789020", "step": 3316, "epoch": 2 }, { "type": "loss", "content": 0.0005071196937933564, "timestamp": "2025-09-10 02:22:53.791168", "step": 3317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:53.821706", "step": 3317, "epoch": 2 }, { "type": "loss", "content": 0.015270305797457695, "timestamp": "2025-09-10 02:22:53.829189", "step": 3318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:53.863710", "step": 3318, "epoch": 2 }, { "type": "loss", "content": 0.003202088875696063, "timestamp": "2025-09-10 02:22:53.877698", "step": 3319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:53.908537", "step": 3319, "epoch": 2 }, { "type": "loss", "content": 0.06514207273721695, "timestamp": "2025-09-10 02:22:53.941350", "step": 3320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:22:53.973867", "step": 3320, "epoch": 2 }, { "type": "loss", "content": 0.00015472178347408772, "timestamp": "2025-09-10 02:22:53.987005", "step": 3321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:54.017702", "step": 3321, "epoch": 2 }, { "type": "loss", "content": 0.023015392944216728, "timestamp": "2025-09-10 02:22:54.024625", "step": 3322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:54.054972", "step": 3322, "epoch": 2 }, { "type": "loss", "content": 0.020949339494109154, "timestamp": "2025-09-10 02:22:54.059051", "step": 3323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:54.089059", "step": 3323, "epoch": 2 }, { "type": "loss", "content": 0.0001148775772890076, "timestamp": "2025-09-10 02:22:54.116873", "step": 3324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:54.148560", "step": 3324, "epoch": 2 }, { "type": "loss", "content": 0.0014125898014754057, "timestamp": "2025-09-10 02:22:54.159196", "step": 3325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-10 02:22:54.210053", "step": 3325, "epoch": 2 }, { "type": "loss", "content": 0.0030126813799142838, "timestamp": "2025-09-10 02:22:54.229548", "step": 3326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:54.271396", "step": 3326, "epoch": 2 }, { "type": "loss", "content": 0.005848866421729326, "timestamp": "2025-09-10 02:22:54.281730", "step": 3327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:54.316933", "step": 3327, "epoch": 2 }, { "type": "loss", "content": 0.0003858902200590819, "timestamp": "2025-09-10 02:22:54.343095", "step": 3328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:54.377475", "step": 3328, "epoch": 2 }, { "type": "loss", "content": 0.0009587566019035876, "timestamp": "2025-09-10 02:22:54.382978", "step": 3329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:22:54.422472", "step": 3329, "epoch": 2 }, { "type": "loss", "content": 0.0006397636607289314, "timestamp": "2025-09-10 02:22:54.436228", "step": 3330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:54.466890", "step": 3330, "epoch": 2 }, { "type": "loss", "content": 0.0004409528919495642, "timestamp": "2025-09-10 02:22:54.470871", "step": 3331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:54.502681", "step": 3331, "epoch": 2 }, { "type": "loss", "content": 0.06097668781876564, "timestamp": "2025-09-10 02:22:54.527627", "step": 3332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:54.559173", "step": 3332, "epoch": 2 }, { "type": "loss", "content": 0.0003068390360567719, "timestamp": "2025-09-10 02:22:54.564065", "step": 3333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:54.599518", "step": 3333, "epoch": 2 }, { "type": "loss", "content": 0.04409003257751465, "timestamp": "2025-09-10 02:22:54.603863", "step": 3334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:54.642867", "step": 3334, "epoch": 2 }, { "type": "loss", "content": 0.008692757226526737, "timestamp": "2025-09-10 02:22:54.646571", "step": 3335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:54.678746", "step": 3335, "epoch": 2 }, { "type": "loss", "content": 0.031811974942684174, "timestamp": "2025-09-10 02:22:54.704331", "step": 3336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:54.736636", "step": 3336, "epoch": 2 }, { "type": "loss", "content": 0.01294754259288311, "timestamp": "2025-09-10 02:22:54.744616", "step": 3337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:54.776905", "step": 3337, "epoch": 2 }, { "type": "loss", "content": 0.00024945108452811837, "timestamp": "2025-09-10 02:22:54.784345", "step": 3338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:54.819269", "step": 3338, "epoch": 2 }, { "type": "loss", "content": 0.003305058693513274, "timestamp": "2025-09-10 02:22:54.829892", "step": 3339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:54.860214", "step": 3339, "epoch": 2 }, { "type": "loss", "content": 0.0017841076478362083, "timestamp": "2025-09-10 02:22:54.885431", "step": 3340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:54.917395", "step": 3340, "epoch": 2 }, { "type": "loss", "content": 0.000721210555639118, "timestamp": "2025-09-10 02:22:54.922181", "step": 3341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:54.954585", "step": 3341, "epoch": 2 }, { "type": "loss", "content": 0.0028600546065717936, "timestamp": "2025-09-10 02:22:54.956462", "step": 3342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:54.986851", "step": 3342, "epoch": 2 }, { "type": "loss", "content": 0.04634520411491394, "timestamp": "2025-09-10 02:22:54.990315", "step": 3343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:55.020295", "step": 3343, "epoch": 2 }, { "type": "loss", "content": 0.012477157637476921, "timestamp": "2025-09-10 02:22:55.048779", "step": 3344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:22:55.080169", "step": 3344, "epoch": 2 }, { "type": "loss", "content": 0.0029694880358874798, "timestamp": "2025-09-10 02:22:55.092968", "step": 3345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:55.123580", "step": 3345, "epoch": 2 }, { "type": "loss", "content": 0.0003129235119558871, "timestamp": "2025-09-10 02:22:55.127885", "step": 3346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:55.158608", "step": 3346, "epoch": 2 }, { "type": "loss", "content": 0.009798407554626465, "timestamp": "2025-09-10 02:22:55.170856", "step": 3347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:55.201318", "step": 3347, "epoch": 2 }, { "type": "loss", "content": 0.0179388877004385, "timestamp": "2025-09-10 02:22:55.228995", "step": 3348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:55.259186", "step": 3348, "epoch": 2 }, { "type": "loss", "content": 0.008463481441140175, "timestamp": "2025-09-10 02:22:55.264411", "step": 3349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:55.295248", "step": 3349, "epoch": 2 }, { "type": "loss", "content": 0.0007734844111837447, "timestamp": "2025-09-10 02:22:55.302578", "step": 3350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:55.333607", "step": 3350, "epoch": 2 }, { "type": "loss", "content": 0.07793903350830078, "timestamp": "2025-09-10 02:22:55.337998", "step": 3351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:55.367868", "step": 3351, "epoch": 2 }, { "type": "loss", "content": 0.00045389196020551026, "timestamp": "2025-09-10 02:22:55.396596", "step": 3352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:55.426948", "step": 3352, "epoch": 2 }, { "type": "loss", "content": 0.01835579052567482, "timestamp": "2025-09-10 02:22:55.432266", "step": 3353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:55.462789", "step": 3353, "epoch": 2 }, { "type": "loss", "content": 0.0028381929732859135, "timestamp": "2025-09-10 02:22:55.474970", "step": 3354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:55.505419", "step": 3354, "epoch": 2 }, { "type": "loss", "content": 0.001101338886655867, "timestamp": "2025-09-10 02:22:55.512718", "step": 3355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:55.542884", "step": 3355, "epoch": 2 }, { "type": "loss", "content": 0.006561249028891325, "timestamp": "2025-09-10 02:22:55.571365", "step": 3356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:55.601655", "step": 3356, "epoch": 2 }, { "type": "loss", "content": 0.0058148703537881374, "timestamp": "2025-09-10 02:22:55.612048", "step": 3357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:55.642061", "step": 3357, "epoch": 2 }, { "type": "loss", "content": 0.0029688451904803514, "timestamp": "2025-09-10 02:22:55.644840", "step": 3358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:22:55.675408", "step": 3358, "epoch": 2 }, { "type": "loss", "content": 0.01831236109137535, "timestamp": "2025-09-10 02:22:55.687572", "step": 3359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:55.717698", "step": 3359, "epoch": 2 }, { "type": "loss", "content": 0.040482617914676666, "timestamp": "2025-09-10 02:22:55.742493", "step": 3360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:55.774544", "step": 3360, "epoch": 2 }, { "type": "loss", "content": 0.021457521244883537, "timestamp": "2025-09-10 02:22:55.782464", "step": 3361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:55.813342", "step": 3361, "epoch": 2 }, { "type": "loss", "content": 0.000866633839905262, "timestamp": "2025-09-10 02:22:55.823534", "step": 3362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:55.853993", "step": 3362, "epoch": 2 }, { "type": "loss", "content": 0.011205156333744526, "timestamp": "2025-09-10 02:22:55.861347", "step": 3363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:55.891354", "step": 3363, "epoch": 2 }, { "type": "loss", "content": 0.0007767033530399203, "timestamp": "2025-09-10 02:22:55.916428", "step": 3364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:55.946368", "step": 3364, "epoch": 2 }, { "type": "loss", "content": 0.002113510388880968, "timestamp": "2025-09-10 02:22:55.951161", "step": 3365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:22:55.983402", "step": 3365, "epoch": 2 }, { "type": "loss", "content": 0.01282955426722765, "timestamp": "2025-09-10 02:22:55.990965", "step": 3366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:22:56.022097", "step": 3366, "epoch": 2 }, { "type": "loss", "content": 0.011836833320558071, "timestamp": "2025-09-10 02:22:56.028672", "step": 3367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:22:56.059408", "step": 3367, "epoch": 2 }, { "type": "loss", "content": 0.010014675557613373, "timestamp": "2025-09-10 02:22:56.090323", "step": 3368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:22:56.119616", "step": 3368, "epoch": 2 }, { "type": "loss", "content": 0.004872338380664587, "timestamp": "2025-09-10 02:22:56.121441", "step": 3369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:56.152678", "step": 3369, "epoch": 2 }, { "type": "loss", "content": 0.007102675270289183, "timestamp": "2025-09-10 02:22:56.156926", "step": 3370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:22:56.187059", "step": 3370, "epoch": 2 }, { "type": "loss", "content": 0.0036141786258667707, "timestamp": "2025-09-10 02:22:56.191742", "step": 3371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:56.222806", "step": 3371, "epoch": 2 }, { "type": "loss", "content": 0.0022325122263282537, "timestamp": "2025-09-10 02:22:56.254610", "step": 3372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:22:56.286385", "step": 3372, "epoch": 2 }, { "type": "loss", "content": 0.027134040370583534, "timestamp": "2025-09-10 02:22:56.291972", "step": 3373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:56.322260", "step": 3373, "epoch": 2 }, { "type": "loss", "content": 0.011692258529365063, "timestamp": "2025-09-10 02:22:56.329292", "step": 3374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:22:56.360309", "step": 3374, "epoch": 2 }, { "type": "loss", "content": 0.0016718130791559815, "timestamp": "2025-09-10 02:22:56.370979", "step": 3375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:22:56.408658", "step": 3375, "epoch": 2 }, { "type": "loss", "content": 0.012171820737421513, "timestamp": "2025-09-10 02:22:56.445450", "step": 3376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:22:56.475527", "step": 3376, "epoch": 2 }, { "type": "loss", "content": 0.001996230101212859, "timestamp": "2025-09-10 02:22:56.477359", "step": 3377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:22:56.506870", "step": 3377, "epoch": 2 }, { "type": "loss", "content": 0.00892335269600153, "timestamp": "2025-09-10 02:22:56.509304", "step": 3378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:22:56.539112", "step": 3378, "epoch": 2 }, { "type": "loss", "content": 0.013574354350566864, "timestamp": "2025-09-10 02:22:56.546166", "step": 3379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:22:56.577810", "step": 3379, "epoch": 2 }, { "type": "loss", "content": 0.010744870640337467, "timestamp": "2025-09-10 02:22:56.611237", "step": 3380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:22:56.651979", "step": 3380, "epoch": 2 }, { "type": "loss", "content": 0.012002465315163136, "timestamp": "2025-09-10 02:22:56.665283", "step": 3381, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:23:06.754647", "step": 3381, "epoch": 2 }, { "type": "pplx", "content": 19503867.643994175, "timestamp": "2025-09-10 02:23:06.757515", "step": 3381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:06.794649", "step": 3381, "epoch": 2 }, { "type": "loss", "content": 0.004555174149572849, "timestamp": "2025-09-10 02:23:06.810538", "step": 3382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:06.845093", "step": 3382, "epoch": 2 }, { "type": "loss", "content": 0.012583248317241669, "timestamp": "2025-09-10 02:23:06.858473", "step": 3383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:06.890181", "step": 3383, "epoch": 2 }, { "type": "loss", "content": 0.03392893821001053, "timestamp": "2025-09-10 02:23:06.922817", "step": 3384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:23:06.957562", "step": 3384, "epoch": 2 }, { "type": "loss", "content": 0.004845472984015942, "timestamp": "2025-09-10 02:23:06.970855", "step": 3385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:07.005311", "step": 3385, "epoch": 2 }, { "type": "loss", "content": 0.0021397171076387167, "timestamp": "2025-09-10 02:23:07.015713", "step": 3386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:07.050165", "step": 3386, "epoch": 2 }, { "type": "loss", "content": 0.012018001638352871, "timestamp": "2025-09-10 02:23:07.059697", "step": 3387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:07.092906", "step": 3387, "epoch": 2 }, { "type": "loss", "content": 0.004577454179525375, "timestamp": "2025-09-10 02:23:07.117881", "step": 3388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:07.149652", "step": 3388, "epoch": 2 }, { "type": "loss", "content": 0.010535781271755695, "timestamp": "2025-09-10 02:23:07.151782", "step": 3389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:07.183243", "step": 3389, "epoch": 2 }, { "type": "loss", "content": 0.005816023796796799, "timestamp": "2025-09-10 02:23:07.190017", "step": 3390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:07.221365", "step": 3390, "epoch": 2 }, { "type": "loss", "content": 0.00614953925833106, "timestamp": "2025-09-10 02:23:07.223727", "step": 3391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:07.255452", "step": 3391, "epoch": 2 }, { "type": "loss", "content": 0.003558420343324542, "timestamp": "2025-09-10 02:23:07.283727", "step": 3392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:07.314913", "step": 3392, "epoch": 2 }, { "type": "loss", "content": 0.005937974434345961, "timestamp": "2025-09-10 02:23:07.319177", "step": 3393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:07.351187", "step": 3393, "epoch": 2 }, { "type": "loss", "content": 0.01408356986939907, "timestamp": "2025-09-10 02:23:07.363532", "step": 3394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:07.394337", "step": 3394, "epoch": 2 }, { "type": "loss", "content": 0.0014379842905327678, "timestamp": "2025-09-10 02:23:07.398046", "step": 3395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:07.429030", "step": 3395, "epoch": 2 }, { "type": "loss", "content": 0.0067471894435584545, "timestamp": "2025-09-10 02:23:07.454158", "step": 3396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:07.486942", "step": 3396, "epoch": 2 }, { "type": "loss", "content": 0.013919052667915821, "timestamp": "2025-09-10 02:23:07.495675", "step": 3397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:23:07.530583", "step": 3397, "epoch": 2 }, { "type": "loss", "content": 0.0027160770259797573, "timestamp": "2025-09-10 02:23:07.544500", "step": 3398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:07.577910", "step": 3398, "epoch": 2 }, { "type": "loss", "content": 0.004795930348336697, "timestamp": "2025-09-10 02:23:07.587618", "step": 3399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:07.619070", "step": 3399, "epoch": 2 }, { "type": "loss", "content": 0.005166996270418167, "timestamp": "2025-09-10 02:23:07.647255", "step": 3400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:07.689699", "step": 3400, "epoch": 2 }, { "type": "loss", "content": 0.0037923615891486406, "timestamp": "2025-09-10 02:23:07.694920", "step": 3401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:07.725938", "step": 3401, "epoch": 2 }, { "type": "loss", "content": 0.003060044953599572, "timestamp": "2025-09-10 02:23:07.737891", "step": 3402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:23:07.778578", "step": 3402, "epoch": 2 }, { "type": "loss", "content": 0.001733014010824263, "timestamp": "2025-09-10 02:23:07.794729", "step": 3403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:07.825397", "step": 3403, "epoch": 2 }, { "type": "loss", "content": 0.0021855314262211323, "timestamp": "2025-09-10 02:23:07.853881", "step": 3404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:07.887128", "step": 3404, "epoch": 2 }, { "type": "loss", "content": 0.03904338553547859, "timestamp": "2025-09-10 02:23:07.891696", "step": 3405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:07.922344", "step": 3405, "epoch": 2 }, { "type": "loss", "content": 0.007601436227560043, "timestamp": "2025-09-10 02:23:07.934606", "step": 3406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:07.966596", "step": 3406, "epoch": 2 }, { "type": "loss", "content": 0.009114629589021206, "timestamp": "2025-09-10 02:23:07.973986", "step": 3407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:08.005183", "step": 3407, "epoch": 2 }, { "type": "loss", "content": 0.021965792402625084, "timestamp": "2025-09-10 02:23:08.036150", "step": 3408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:08.068219", "step": 3408, "epoch": 2 }, { "type": "loss", "content": 0.007908275350928307, "timestamp": "2025-09-10 02:23:08.081050", "step": 3409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:08.112268", "step": 3409, "epoch": 2 }, { "type": "loss", "content": 0.009908582083880901, "timestamp": "2025-09-10 02:23:08.116513", "step": 3410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:08.150016", "step": 3410, "epoch": 2 }, { "type": "loss", "content": 0.006652395240962505, "timestamp": "2025-09-10 02:23:08.163360", "step": 3411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:08.195014", "step": 3411, "epoch": 2 }, { "type": "loss", "content": 0.005871969740837812, "timestamp": "2025-09-10 02:23:08.225585", "step": 3412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.257697", "step": 3412, "epoch": 2 }, { "type": "loss", "content": 0.007932118140161037, "timestamp": "2025-09-10 02:23:08.262776", "step": 3413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.294989", "step": 3413, "epoch": 2 }, { "type": "loss", "content": 0.011967500671744347, "timestamp": "2025-09-10 02:23:08.302832", "step": 3414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.334066", "step": 3414, "epoch": 2 }, { "type": "loss", "content": 0.0017909369198605418, "timestamp": "2025-09-10 02:23:08.341717", "step": 3415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.372844", "step": 3415, "epoch": 2 }, { "type": "loss", "content": 0.007196805439889431, "timestamp": "2025-09-10 02:23:08.401415", "step": 3416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.432415", "step": 3416, "epoch": 2 }, { "type": "loss", "content": 0.005088069010525942, "timestamp": "2025-09-10 02:23:08.437571", "step": 3417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:08.468347", "step": 3417, "epoch": 2 }, { "type": "loss", "content": 0.011828926391899586, "timestamp": "2025-09-10 02:23:08.472861", "step": 3418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:23:08.507358", "step": 3418, "epoch": 2 }, { "type": "loss", "content": 0.0015730817103758454, "timestamp": "2025-09-10 02:23:08.520988", "step": 3419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:08.553080", "step": 3419, "epoch": 2 }, { "type": "loss", "content": 0.010220969095826149, "timestamp": "2025-09-10 02:23:08.583498", "step": 3420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:23:08.623942", "step": 3420, "epoch": 2 }, { "type": "loss", "content": 0.0031598855275660753, "timestamp": "2025-09-10 02:23:08.640953", "step": 3421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:08.672646", "step": 3421, "epoch": 2 }, { "type": "loss", "content": 0.028410514816641808, "timestamp": "2025-09-10 02:23:08.679740", "step": 3422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:08.710963", "step": 3422, "epoch": 2 }, { "type": "loss", "content": 0.003635372733697295, "timestamp": "2025-09-10 02:23:08.717809", "step": 3423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:08.748767", "step": 3423, "epoch": 2 }, { "type": "loss", "content": 0.010761960409581661, "timestamp": "2025-09-10 02:23:08.776510", "step": 3424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:08.807771", "step": 3424, "epoch": 2 }, { "type": "loss", "content": 0.0029311534017324448, "timestamp": "2025-09-10 02:23:08.815772", "step": 3425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:08.846343", "step": 3425, "epoch": 2 }, { "type": "loss", "content": 0.005955227185040712, "timestamp": "2025-09-10 02:23:08.853700", "step": 3426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:08.887227", "step": 3426, "epoch": 2 }, { "type": "loss", "content": 0.0019082642393186688, "timestamp": "2025-09-10 02:23:08.894193", "step": 3427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:08.927241", "step": 3427, "epoch": 2 }, { "type": "loss", "content": 0.036622676998376846, "timestamp": "2025-09-10 02:23:08.958448", "step": 3428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:08.991248", "step": 3428, "epoch": 2 }, { "type": "loss", "content": 0.016960853710770607, "timestamp": "2025-09-10 02:23:08.996726", "step": 3429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:09.026889", "step": 3429, "epoch": 2 }, { "type": "loss", "content": 0.013002237305045128, "timestamp": "2025-09-10 02:23:09.034596", "step": 3430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:09.065070", "step": 3430, "epoch": 2 }, { "type": "loss", "content": 0.008839133195579052, "timestamp": "2025-09-10 02:23:09.071772", "step": 3431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:09.104799", "step": 3431, "epoch": 2 }, { "type": "loss", "content": 0.01928016170859337, "timestamp": "2025-09-10 02:23:09.132528", "step": 3432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:09.163379", "step": 3432, "epoch": 2 }, { "type": "loss", "content": 0.0017680247547104955, "timestamp": "2025-09-10 02:23:09.165668", "step": 3433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:09.195755", "step": 3433, "epoch": 2 }, { "type": "loss", "content": 0.0012528672814369202, "timestamp": "2025-09-10 02:23:09.198349", "step": 3434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:09.228746", "step": 3434, "epoch": 2 }, { "type": "loss", "content": 0.0041799359023571014, "timestamp": "2025-09-10 02:23:09.235932", "step": 3435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:09.266783", "step": 3435, "epoch": 2 }, { "type": "loss", "content": 0.00306068011559546, "timestamp": "2025-09-10 02:23:09.295547", "step": 3436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:09.326486", "step": 3436, "epoch": 2 }, { "type": "loss", "content": 0.0031013197731226683, "timestamp": "2025-09-10 02:23:09.331936", "step": 3437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:09.362497", "step": 3437, "epoch": 2 }, { "type": "loss", "content": 0.001428862102329731, "timestamp": "2025-09-10 02:23:09.369800", "step": 3438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:09.404365", "step": 3438, "epoch": 2 }, { "type": "loss", "content": 0.0005541969439946115, "timestamp": "2025-09-10 02:23:09.418183", "step": 3439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:09.449030", "step": 3439, "epoch": 2 }, { "type": "loss", "content": 0.004622712731361389, "timestamp": "2025-09-10 02:23:09.478004", "step": 3440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:09.509849", "step": 3440, "epoch": 2 }, { "type": "loss", "content": 0.0019380019512027502, "timestamp": "2025-09-10 02:23:09.514406", "step": 3441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:09.545872", "step": 3441, "epoch": 2 }, { "type": "loss", "content": 0.003715726314112544, "timestamp": "2025-09-10 02:23:09.556902", "step": 3442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:09.588546", "step": 3442, "epoch": 2 }, { "type": "loss", "content": 0.006264827214181423, "timestamp": "2025-09-10 02:23:09.595030", "step": 3443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:09.627432", "step": 3443, "epoch": 2 }, { "type": "loss", "content": 0.004857002291828394, "timestamp": "2025-09-10 02:23:09.658245", "step": 3444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:09.693492", "step": 3444, "epoch": 2 }, { "type": "loss", "content": 0.002044878900051117, "timestamp": "2025-09-10 02:23:09.700332", "step": 3445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:09.733066", "step": 3445, "epoch": 2 }, { "type": "loss", "content": 0.04166651517152786, "timestamp": "2025-09-10 02:23:09.739766", "step": 3446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:09.772160", "step": 3446, "epoch": 2 }, { "type": "loss", "content": 0.0031065084040164948, "timestamp": "2025-09-10 02:23:09.782139", "step": 3447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:09.816534", "step": 3447, "epoch": 2 }, { "type": "loss", "content": 0.0012844757875427604, "timestamp": "2025-09-10 02:23:09.843914", "step": 3448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:09.875547", "step": 3448, "epoch": 2 }, { "type": "loss", "content": 0.0019296734826639295, "timestamp": "2025-09-10 02:23:09.883584", "step": 3449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:09.916452", "step": 3449, "epoch": 2 }, { "type": "loss", "content": 0.0020907416474074125, "timestamp": "2025-09-10 02:23:09.927687", "step": 3450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:09.960487", "step": 3450, "epoch": 2 }, { "type": "loss", "content": 0.002724104793742299, "timestamp": "2025-09-10 02:23:09.967106", "step": 3451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:09.999304", "step": 3451, "epoch": 2 }, { "type": "loss", "content": 0.012764266692101955, "timestamp": "2025-09-10 02:23:10.029740", "step": 3452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:10.061699", "step": 3452, "epoch": 2 }, { "type": "loss", "content": 0.005641660653054714, "timestamp": "2025-09-10 02:23:10.063917", "step": 3453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:10.096530", "step": 3453, "epoch": 2 }, { "type": "loss", "content": 0.003335190238431096, "timestamp": "2025-09-10 02:23:10.102996", "step": 3454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:10.145804", "step": 3454, "epoch": 2 }, { "type": "loss", "content": 0.0014257727889344096, "timestamp": "2025-09-10 02:23:10.157428", "step": 3455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:23:10.196714", "step": 3455, "epoch": 2 }, { "type": "loss", "content": 0.005661274306476116, "timestamp": "2025-09-10 02:23:10.233271", "step": 3456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:10.269812", "step": 3456, "epoch": 2 }, { "type": "loss", "content": 0.0007868251414038241, "timestamp": "2025-09-10 02:23:10.272408", "step": 3457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:10.311148", "step": 3457, "epoch": 2 }, { "type": "loss", "content": 0.0019356502452865243, "timestamp": "2025-09-10 02:23:10.318900", "step": 3458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:10.349817", "step": 3458, "epoch": 2 }, { "type": "loss", "content": 0.006856503430753946, "timestamp": "2025-09-10 02:23:10.357731", "step": 3459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:10.394105", "step": 3459, "epoch": 2 }, { "type": "loss", "content": 0.0010916010942310095, "timestamp": "2025-09-10 02:23:10.428313", "step": 3460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:10.463463", "step": 3460, "epoch": 2 }, { "type": "loss", "content": 0.013903248123824596, "timestamp": "2025-09-10 02:23:10.469270", "step": 3461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:10.502983", "step": 3461, "epoch": 2 }, { "type": "loss", "content": 0.002239000052213669, "timestamp": "2025-09-10 02:23:10.506401", "step": 3462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:10.541230", "step": 3462, "epoch": 2 }, { "type": "loss", "content": 0.0007613528869114816, "timestamp": "2025-09-10 02:23:10.543512", "step": 3463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:23:10.581849", "step": 3463, "epoch": 2 }, { "type": "loss", "content": 0.0012689571594819427, "timestamp": "2025-09-10 02:23:10.616370", "step": 3464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:10.663739", "step": 3464, "epoch": 2 }, { "type": "loss", "content": 0.0037759561091661453, "timestamp": "2025-09-10 02:23:10.671022", "step": 3465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:10.714116", "step": 3465, "epoch": 2 }, { "type": "loss", "content": 0.0006281206151470542, "timestamp": "2025-09-10 02:23:10.719764", "step": 3466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:10.752091", "step": 3466, "epoch": 2 }, { "type": "loss", "content": 0.000767476565670222, "timestamp": "2025-09-10 02:23:10.759522", "step": 3467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:10.793859", "step": 3467, "epoch": 2 }, { "type": "loss", "content": 0.000946753949392587, "timestamp": "2025-09-10 02:23:10.819850", "step": 3468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:10.850932", "step": 3468, "epoch": 2 }, { "type": "loss", "content": 0.014056609943509102, "timestamp": "2025-09-10 02:23:10.853224", "step": 3469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:10.886985", "step": 3469, "epoch": 2 }, { "type": "loss", "content": 0.0004704651073552668, "timestamp": "2025-09-10 02:23:10.894094", "step": 3470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:10.926723", "step": 3470, "epoch": 2 }, { "type": "loss", "content": 0.0011611180379986763, "timestamp": "2025-09-10 02:23:10.933919", "step": 3471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:10.972466", "step": 3471, "epoch": 2 }, { "type": "loss", "content": 0.004651397932320833, "timestamp": "2025-09-10 02:23:11.000688", "step": 3472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:11.033209", "step": 3472, "epoch": 2 }, { "type": "loss", "content": 0.0010988789144903421, "timestamp": "2025-09-10 02:23:11.038778", "step": 3473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:11.069426", "step": 3473, "epoch": 2 }, { "type": "loss", "content": 0.001688135089352727, "timestamp": "2025-09-10 02:23:11.073789", "step": 3474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:11.105988", "step": 3474, "epoch": 2 }, { "type": "loss", "content": 0.008201858960092068, "timestamp": "2025-09-10 02:23:11.110507", "step": 3475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:11.142400", "step": 3475, "epoch": 2 }, { "type": "loss", "content": 0.0017634114483371377, "timestamp": "2025-09-10 02:23:11.167876", "step": 3476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:11.198653", "step": 3476, "epoch": 2 }, { "type": "loss", "content": 0.002664331579580903, "timestamp": "2025-09-10 02:23:11.204161", "step": 3477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:11.235062", "step": 3477, "epoch": 2 }, { "type": "loss", "content": 0.0006722270627506077, "timestamp": "2025-09-10 02:23:11.242056", "step": 3478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:11.272417", "step": 3478, "epoch": 2 }, { "type": "loss", "content": 0.0010332902893424034, "timestamp": "2025-09-10 02:23:11.276778", "step": 3479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:11.308084", "step": 3479, "epoch": 2 }, { "type": "loss", "content": 0.0014329560799524188, "timestamp": "2025-09-10 02:23:11.337032", "step": 3480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:11.367640", "step": 3480, "epoch": 2 }, { "type": "loss", "content": 0.009547821246087551, "timestamp": "2025-09-10 02:23:11.373296", "step": 3481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:11.406835", "step": 3481, "epoch": 2 }, { "type": "loss", "content": 0.015632616356015205, "timestamp": "2025-09-10 02:23:11.420229", "step": 3482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:11.451294", "step": 3482, "epoch": 2 }, { "type": "loss", "content": 0.0008822629461064935, "timestamp": "2025-09-10 02:23:11.458362", "step": 3483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:11.507670", "step": 3483, "epoch": 2 }, { "type": "loss", "content": 0.024470632895827293, "timestamp": "2025-09-10 02:23:11.532646", "step": 3484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:11.563413", "step": 3484, "epoch": 2 }, { "type": "loss", "content": 0.002709366148337722, "timestamp": "2025-09-10 02:23:11.568540", "step": 3485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:11.602144", "step": 3485, "epoch": 2 }, { "type": "loss", "content": 0.0006141713238321245, "timestamp": "2025-09-10 02:23:11.615536", "step": 3486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:11.646966", "step": 3486, "epoch": 2 }, { "type": "loss", "content": 0.00025127717526629567, "timestamp": "2025-09-10 02:23:11.654566", "step": 3487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:11.689017", "step": 3487, "epoch": 2 }, { "type": "loss", "content": 0.0025279794353991747, "timestamp": "2025-09-10 02:23:11.720345", "step": 3488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:11.751908", "step": 3488, "epoch": 2 }, { "type": "loss", "content": 0.0007817599689587951, "timestamp": "2025-09-10 02:23:11.754063", "step": 3489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:11.785830", "step": 3489, "epoch": 2 }, { "type": "loss", "content": 0.0025948244147002697, "timestamp": "2025-09-10 02:23:11.792944", "step": 3490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:11.823703", "step": 3490, "epoch": 2 }, { "type": "loss", "content": 0.0002477052912581712, "timestamp": "2025-09-10 02:23:11.830840", "step": 3491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:11.861527", "step": 3491, "epoch": 2 }, { "type": "loss", "content": 0.020622704178094864, "timestamp": "2025-09-10 02:23:11.889421", "step": 3492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:11.920080", "step": 3492, "epoch": 2 }, { "type": "loss", "content": 0.0009450044599361718, "timestamp": "2025-09-10 02:23:11.925297", "step": 3493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:23:11.963272", "step": 3493, "epoch": 2 }, { "type": "loss", "content": 0.004593479912728071, "timestamp": "2025-09-10 02:23:11.978930", "step": 3494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:12.011166", "step": 3494, "epoch": 2 }, { "type": "loss", "content": 0.0003686284471768886, "timestamp": "2025-09-10 02:23:12.015801", "step": 3495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:23:12.051195", "step": 3495, "epoch": 2 }, { "type": "loss", "content": 0.0008759652846492827, "timestamp": "2025-09-10 02:23:12.085794", "step": 3496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:12.116178", "step": 3496, "epoch": 2 }, { "type": "loss", "content": 0.002225195523351431, "timestamp": "2025-09-10 02:23:12.118332", "step": 3497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:12.148949", "step": 3497, "epoch": 2 }, { "type": "loss", "content": 0.0005732810823246837, "timestamp": "2025-09-10 02:23:12.156800", "step": 3498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:12.188215", "step": 3498, "epoch": 2 }, { "type": "loss", "content": 0.0018855527741834521, "timestamp": "2025-09-10 02:23:12.199185", "step": 3499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:12.230391", "step": 3499, "epoch": 2 }, { "type": "loss", "content": 0.00036894562072120607, "timestamp": "2025-09-10 02:23:12.258639", "step": 3500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3500", "timestamp": "2025-09-10 02:23:16.939336", "step": 3500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:16.972263", "step": 3500, "epoch": 2 }, { "type": "loss", "content": 0.0015905782347545028, "timestamp": "2025-09-10 02:23:16.976744", "step": 3501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:17.011135", "step": 3501, "epoch": 2 }, { "type": "loss", "content": 0.0011544699082151055, "timestamp": "2025-09-10 02:23:17.017976", "step": 3502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:17.049103", "step": 3502, "epoch": 2 }, { "type": "loss", "content": 0.00034944407525472343, "timestamp": "2025-09-10 02:23:17.051563", "step": 3503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:17.081918", "step": 3503, "epoch": 2 }, { "type": "loss", "content": 0.00036820146488025784, "timestamp": "2025-09-10 02:23:17.109738", "step": 3504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:17.142389", "step": 3504, "epoch": 2 }, { "type": "loss", "content": 0.00649291044101119, "timestamp": "2025-09-10 02:23:17.144832", "step": 3505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.176811", "step": 3505, "epoch": 2 }, { "type": "loss", "content": 0.009862485341727734, "timestamp": "2025-09-10 02:23:17.184533", "step": 3506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.215687", "step": 3506, "epoch": 2 }, { "type": "loss", "content": 0.0013766074553132057, "timestamp": "2025-09-10 02:23:17.223525", "step": 3507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:17.254771", "step": 3507, "epoch": 2 }, { "type": "loss", "content": 0.004568330943584442, "timestamp": "2025-09-10 02:23:17.283323", "step": 3508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:17.318817", "step": 3508, "epoch": 2 }, { "type": "loss", "content": 0.012078741565346718, "timestamp": "2025-09-10 02:23:17.328682", "step": 3509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:17.359784", "step": 3509, "epoch": 2 }, { "type": "loss", "content": 0.0004991721361875534, "timestamp": "2025-09-10 02:23:17.367115", "step": 3510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:17.397399", "step": 3510, "epoch": 2 }, { "type": "loss", "content": 0.0006359159597195685, "timestamp": "2025-09-10 02:23:17.404120", "step": 3511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:17.435258", "step": 3511, "epoch": 2 }, { "type": "loss", "content": 0.0014153111260384321, "timestamp": "2025-09-10 02:23:17.466397", "step": 3512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.498228", "step": 3512, "epoch": 2 }, { "type": "loss", "content": 0.0020929204765707254, "timestamp": "2025-09-10 02:23:17.503563", "step": 3513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:17.533849", "step": 3513, "epoch": 2 }, { "type": "loss", "content": 0.000537493615411222, "timestamp": "2025-09-10 02:23:17.537962", "step": 3514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:17.568274", "step": 3514, "epoch": 2 }, { "type": "loss", "content": 0.0007732919184491038, "timestamp": "2025-09-10 02:23:17.575550", "step": 3515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:17.610571", "step": 3515, "epoch": 2 }, { "type": "loss", "content": 0.0020853166934102774, "timestamp": "2025-09-10 02:23:17.638308", "step": 3516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:17.670249", "step": 3516, "epoch": 2 }, { "type": "loss", "content": 0.008857056498527527, "timestamp": "2025-09-10 02:23:17.674492", "step": 3517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:17.706110", "step": 3517, "epoch": 2 }, { "type": "loss", "content": 0.001030710176564753, "timestamp": "2025-09-10 02:23:17.716250", "step": 3518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.751247", "step": 3518, "epoch": 2 }, { "type": "loss", "content": 0.0005307839601300657, "timestamp": "2025-09-10 02:23:17.759184", "step": 3519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.790289", "step": 3519, "epoch": 2 }, { "type": "loss", "content": 0.003780403407290578, "timestamp": "2025-09-10 02:23:17.819065", "step": 3520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:17.849819", "step": 3520, "epoch": 2 }, { "type": "loss", "content": 0.0005517909303307533, "timestamp": "2025-09-10 02:23:17.852213", "step": 3521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:17.883587", "step": 3521, "epoch": 2 }, { "type": "loss", "content": 0.0004757777787744999, "timestamp": "2025-09-10 02:23:17.891136", "step": 3522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:17.922466", "step": 3522, "epoch": 2 }, { "type": "loss", "content": 0.000609197246376425, "timestamp": "2025-09-10 02:23:17.927193", "step": 3523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:17.958742", "step": 3523, "epoch": 2 }, { "type": "loss", "content": 0.0007339877774938941, "timestamp": "2025-09-10 02:23:17.987436", "step": 3524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:18.020103", "step": 3524, "epoch": 2 }, { "type": "loss", "content": 0.0018651520367711782, "timestamp": "2025-09-10 02:23:18.028154", "step": 3525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:18.066961", "step": 3525, "epoch": 2 }, { "type": "loss", "content": 0.003974412567913532, "timestamp": "2025-09-10 02:23:18.082846", "step": 3526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:18.114410", "step": 3526, "epoch": 2 }, { "type": "loss", "content": 0.0008718844619579613, "timestamp": "2025-09-10 02:23:18.118537", "step": 3527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:18.150124", "step": 3527, "epoch": 2 }, { "type": "loss", "content": 0.00030707629048265517, "timestamp": "2025-09-10 02:23:18.178486", "step": 3528, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:23:28.262537", "step": 3528, "epoch": 2 }, { "type": "pplx", "content": 22053610.470987573, "timestamp": "2025-09-10 02:23:28.265279", "step": 3528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:28.295598", "step": 3528, "epoch": 2 }, { "type": "loss", "content": 0.00019998988136649132, "timestamp": "2025-09-10 02:23:28.303559", "step": 3529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:28.335417", "step": 3529, "epoch": 2 }, { "type": "loss", "content": 0.013725848868489265, "timestamp": "2025-09-10 02:23:28.345349", "step": 3530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:28.376647", "step": 3530, "epoch": 2 }, { "type": "loss", "content": 0.001963739050552249, "timestamp": "2025-09-10 02:23:28.381047", "step": 3531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:23:28.419680", "step": 3531, "epoch": 2 }, { "type": "loss", "content": 0.01121476013213396, "timestamp": "2025-09-10 02:23:28.456740", "step": 3532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:28.487527", "step": 3532, "epoch": 2 }, { "type": "loss", "content": 0.00019301848078612238, "timestamp": "2025-09-10 02:23:28.496081", "step": 3533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:28.527158", "step": 3533, "epoch": 2 }, { "type": "loss", "content": 0.0023806169629096985, "timestamp": "2025-09-10 02:23:28.539780", "step": 3534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:28.570913", "step": 3534, "epoch": 2 }, { "type": "loss", "content": 0.0010761814191937447, "timestamp": "2025-09-10 02:23:28.575368", "step": 3535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:28.606462", "step": 3535, "epoch": 2 }, { "type": "loss", "content": 0.00018318326328881085, "timestamp": "2025-09-10 02:23:28.635024", "step": 3536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:23:28.665441", "step": 3536, "epoch": 2 }, { "type": "loss", "content": 0.0066347974352538586, "timestamp": "2025-09-10 02:23:28.667441", "step": 3537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:28.698524", "step": 3537, "epoch": 2 }, { "type": "loss", "content": 0.00013012031558901072, "timestamp": "2025-09-10 02:23:28.703094", "step": 3538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:23:28.747230", "step": 3538, "epoch": 2 }, { "type": "loss", "content": 0.00027974756085313857, "timestamp": "2025-09-10 02:23:28.764985", "step": 3539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:28.795795", "step": 3539, "epoch": 2 }, { "type": "loss", "content": 0.003288812702521682, "timestamp": "2025-09-10 02:23:28.827880", "step": 3540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:28.860596", "step": 3540, "epoch": 2 }, { "type": "loss", "content": 0.0002967107866425067, "timestamp": "2025-09-10 02:23:28.862803", "step": 3541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:28.893979", "step": 3541, "epoch": 2 }, { "type": "loss", "content": 0.0013638153905048966, "timestamp": "2025-09-10 02:23:28.900819", "step": 3542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:28.931708", "step": 3542, "epoch": 2 }, { "type": "loss", "content": 0.0009955601999536157, "timestamp": "2025-09-10 02:23:28.938584", "step": 3543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:28.970094", "step": 3543, "epoch": 2 }, { "type": "loss", "content": 0.005556150339543819, "timestamp": "2025-09-10 02:23:29.002133", "step": 3544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:29.032247", "step": 3544, "epoch": 2 }, { "type": "loss", "content": 0.0003385456802789122, "timestamp": "2025-09-10 02:23:29.034306", "step": 3545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:29.064533", "step": 3545, "epoch": 2 }, { "type": "loss", "content": 0.0076200878247618675, "timestamp": "2025-09-10 02:23:29.071305", "step": 3546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:29.101681", "step": 3546, "epoch": 2 }, { "type": "loss", "content": 0.006286826450377703, "timestamp": "2025-09-10 02:23:29.108661", "step": 3547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:29.139842", "step": 3547, "epoch": 2 }, { "type": "loss", "content": 0.0003426824405323714, "timestamp": "2025-09-10 02:23:29.173020", "step": 3548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:29.204891", "step": 3548, "epoch": 2 }, { "type": "loss", "content": 0.034861672669649124, "timestamp": "2025-09-10 02:23:29.209502", "step": 3549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:29.251602", "step": 3549, "epoch": 2 }, { "type": "loss", "content": 0.0012050783261656761, "timestamp": "2025-09-10 02:23:29.265034", "step": 3550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:29.301796", "step": 3550, "epoch": 2 }, { "type": "loss", "content": 0.0464249923825264, "timestamp": "2025-09-10 02:23:29.309590", "step": 3551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:23:29.366721", "step": 3551, "epoch": 2 }, { "type": "loss", "content": 0.006405732128769159, "timestamp": "2025-09-10 02:23:29.409390", "step": 3552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:29.450769", "step": 3552, "epoch": 2 }, { "type": "loss", "content": 0.00038313533877953887, "timestamp": "2025-09-10 02:23:29.458913", "step": 3553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:29.495471", "step": 3553, "epoch": 2 }, { "type": "loss", "content": 0.0009717740467749536, "timestamp": "2025-09-10 02:23:29.503209", "step": 3554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:23:29.546360", "step": 3554, "epoch": 2 }, { "type": "loss", "content": 0.0007425061194226146, "timestamp": "2025-09-10 02:23:29.561963", "step": 3555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:29.602489", "step": 3555, "epoch": 2 }, { "type": "loss", "content": 0.0007218411774374545, "timestamp": "2025-09-10 02:23:29.630453", "step": 3556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:29.673053", "step": 3556, "epoch": 2 }, { "type": "loss", "content": 0.0036273004952818155, "timestamp": "2025-09-10 02:23:29.677334", "step": 3557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:29.718240", "step": 3557, "epoch": 2 }, { "type": "loss", "content": 0.0006142216734588146, "timestamp": "2025-09-10 02:23:29.731639", "step": 3558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:23:29.781095", "step": 3558, "epoch": 2 }, { "type": "loss", "content": 0.0007438276661559939, "timestamp": "2025-09-10 02:23:29.798206", "step": 3559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:23:29.840536", "step": 3559, "epoch": 2 }, { "type": "loss", "content": 0.00014973794168327004, "timestamp": "2025-09-10 02:23:29.875435", "step": 3560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:29.908439", "step": 3560, "epoch": 2 }, { "type": "loss", "content": 0.0003213935415260494, "timestamp": "2025-09-10 02:23:29.913349", "step": 3561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:29.947496", "step": 3561, "epoch": 2 }, { "type": "loss", "content": 0.0009177852771244943, "timestamp": "2025-09-10 02:23:29.959499", "step": 3562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:29.991532", "step": 3562, "epoch": 2 }, { "type": "loss", "content": 0.0032881794031709433, "timestamp": "2025-09-10 02:23:30.002537", "step": 3563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:30.033810", "step": 3563, "epoch": 2 }, { "type": "loss", "content": 0.004884254653006792, "timestamp": "2025-09-10 02:23:30.065793", "step": 3564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:30.096997", "step": 3564, "epoch": 2 }, { "type": "loss", "content": 0.005217746831476688, "timestamp": "2025-09-10 02:23:30.106937", "step": 3565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:30.137626", "step": 3565, "epoch": 2 }, { "type": "loss", "content": 0.002377876313403249, "timestamp": "2025-09-10 02:23:30.141679", "step": 3566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.173799", "step": 3566, "epoch": 2 }, { "type": "loss", "content": 0.0286801066249609, "timestamp": "2025-09-10 02:23:30.181406", "step": 3567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:30.218965", "step": 3567, "epoch": 2 }, { "type": "loss", "content": 0.025519220158457756, "timestamp": "2025-09-10 02:23:30.250158", "step": 3568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:30.280235", "step": 3568, "epoch": 2 }, { "type": "loss", "content": 0.0003562222118489444, "timestamp": "2025-09-10 02:23:30.284802", "step": 3569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.316472", "step": 3569, "epoch": 2 }, { "type": "loss", "content": 0.013461283408105373, "timestamp": "2025-09-10 02:23:30.323863", "step": 3570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:30.355122", "step": 3570, "epoch": 2 }, { "type": "loss", "content": 0.00494401203468442, "timestamp": "2025-09-10 02:23:30.362015", "step": 3571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:30.394352", "step": 3571, "epoch": 2 }, { "type": "loss", "content": 0.000439615425420925, "timestamp": "2025-09-10 02:23:30.425502", "step": 3572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:30.456922", "step": 3572, "epoch": 2 }, { "type": "loss", "content": 0.0008407292771153152, "timestamp": "2025-09-10 02:23:30.464692", "step": 3573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:30.495409", "step": 3573, "epoch": 2 }, { "type": "loss", "content": 0.0009364182478748262, "timestamp": "2025-09-10 02:23:30.499546", "step": 3574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.531237", "step": 3574, "epoch": 2 }, { "type": "loss", "content": 0.026412170380353928, "timestamp": "2025-09-10 02:23:30.538705", "step": 3575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.569767", "step": 3575, "epoch": 2 }, { "type": "loss", "content": 0.0028750034980475903, "timestamp": "2025-09-10 02:23:30.598405", "step": 3576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.629371", "step": 3576, "epoch": 2 }, { "type": "loss", "content": 0.0007422365597449243, "timestamp": "2025-09-10 02:23:30.634494", "step": 3577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:30.664788", "step": 3577, "epoch": 2 }, { "type": "loss", "content": 0.00027479632990434766, "timestamp": "2025-09-10 02:23:30.667506", "step": 3578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:30.697388", "step": 3578, "epoch": 2 }, { "type": "loss", "content": 0.0005509444163180888, "timestamp": "2025-09-10 02:23:30.701644", "step": 3579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:30.732510", "step": 3579, "epoch": 2 }, { "type": "loss", "content": 0.0011080257827416062, "timestamp": "2025-09-10 02:23:30.761032", "step": 3580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:30.792233", "step": 3580, "epoch": 2 }, { "type": "loss", "content": 0.001185481552965939, "timestamp": "2025-09-10 02:23:30.802266", "step": 3581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:30.832648", "step": 3581, "epoch": 2 }, { "type": "loss", "content": 0.0008745525847189128, "timestamp": "2025-09-10 02:23:30.837004", "step": 3582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:30.870080", "step": 3582, "epoch": 2 }, { "type": "loss", "content": 0.021427616477012634, "timestamp": "2025-09-10 02:23:30.881842", "step": 3583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:30.913345", "step": 3583, "epoch": 2 }, { "type": "loss", "content": 0.00033548500505276024, "timestamp": "2025-09-10 02:23:30.938697", "step": 3584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:30.976287", "step": 3584, "epoch": 2 }, { "type": "loss", "content": 0.001324447919614613, "timestamp": "2025-09-10 02:23:30.979084", "step": 3585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:31.014022", "step": 3585, "epoch": 2 }, { "type": "loss", "content": 0.027664339169859886, "timestamp": "2025-09-10 02:23:31.022956", "step": 3586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:31.054239", "step": 3586, "epoch": 2 }, { "type": "loss", "content": 0.0006238113855943084, "timestamp": "2025-09-10 02:23:31.061602", "step": 3587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.092272", "step": 3587, "epoch": 2 }, { "type": "loss", "content": 0.0007867095409892499, "timestamp": "2025-09-10 02:23:31.120230", "step": 3588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:31.151875", "step": 3588, "epoch": 2 }, { "type": "loss", "content": 0.029622314497828484, "timestamp": "2025-09-10 02:23:31.161735", "step": 3589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.193252", "step": 3589, "epoch": 2 }, { "type": "loss", "content": 0.00037610027357004583, "timestamp": "2025-09-10 02:23:31.200375", "step": 3590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:31.230682", "step": 3590, "epoch": 2 }, { "type": "loss", "content": 0.002425598446279764, "timestamp": "2025-09-10 02:23:31.234856", "step": 3591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.266565", "step": 3591, "epoch": 2 }, { "type": "loss", "content": 0.019675688818097115, "timestamp": "2025-09-10 02:23:31.294566", "step": 3592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.326699", "step": 3592, "epoch": 2 }, { "type": "loss", "content": 0.0001952952443389222, "timestamp": "2025-09-10 02:23:31.331488", "step": 3593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:31.362721", "step": 3593, "epoch": 2 }, { "type": "loss", "content": 0.0218330230563879, "timestamp": "2025-09-10 02:23:31.372828", "step": 3594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:31.406850", "step": 3594, "epoch": 2 }, { "type": "loss", "content": 0.0041700261645019054, "timestamp": "2025-09-10 02:23:31.419032", "step": 3595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:31.451196", "step": 3595, "epoch": 2 }, { "type": "loss", "content": 0.004490002058446407, "timestamp": "2025-09-10 02:23:31.483020", "step": 3596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:31.514251", "step": 3596, "epoch": 2 }, { "type": "loss", "content": 0.002926964545622468, "timestamp": "2025-09-10 02:23:31.518753", "step": 3597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:31.550875", "step": 3597, "epoch": 2 }, { "type": "loss", "content": 0.00023523984418716282, "timestamp": "2025-09-10 02:23:31.558371", "step": 3598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.590223", "step": 3598, "epoch": 2 }, { "type": "loss", "content": 0.004813474602997303, "timestamp": "2025-09-10 02:23:31.597312", "step": 3599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.627946", "step": 3599, "epoch": 2 }, { "type": "loss", "content": 0.00039707665564492345, "timestamp": "2025-09-10 02:23:31.655957", "step": 3600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:31.688467", "step": 3600, "epoch": 2 }, { "type": "loss", "content": 0.000567035167478025, "timestamp": "2025-09-10 02:23:31.693782", "step": 3601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:31.724428", "step": 3601, "epoch": 2 }, { "type": "loss", "content": 0.015649326145648956, "timestamp": "2025-09-10 02:23:31.731574", "step": 3602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:31.767124", "step": 3602, "epoch": 2 }, { "type": "loss", "content": 0.01300052274018526, "timestamp": "2025-09-10 02:23:31.773836", "step": 3603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:31.805427", "step": 3603, "epoch": 2 }, { "type": "loss", "content": 0.022090671584010124, "timestamp": "2025-09-10 02:23:31.833160", "step": 3604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:31.865752", "step": 3604, "epoch": 2 }, { "type": "loss", "content": 0.002304330002516508, "timestamp": "2025-09-10 02:23:31.870759", "step": 3605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:31.901258", "step": 3605, "epoch": 2 }, { "type": "loss", "content": 0.03670268505811691, "timestamp": "2025-09-10 02:23:31.905369", "step": 3606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:23:31.943717", "step": 3606, "epoch": 2 }, { "type": "loss", "content": 0.0005911525222472847, "timestamp": "2025-09-10 02:23:31.959384", "step": 3607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:31.992348", "step": 3607, "epoch": 2 }, { "type": "loss", "content": 0.0006495547713711858, "timestamp": "2025-09-10 02:23:32.023492", "step": 3608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:32.054908", "step": 3608, "epoch": 2 }, { "type": "loss", "content": 0.0005558125558309257, "timestamp": "2025-09-10 02:23:32.059342", "step": 3609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:32.090916", "step": 3609, "epoch": 2 }, { "type": "loss", "content": 0.0061562503688037395, "timestamp": "2025-09-10 02:23:32.102900", "step": 3610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:32.134868", "step": 3610, "epoch": 2 }, { "type": "loss", "content": 0.008789503946900368, "timestamp": "2025-09-10 02:23:32.144774", "step": 3611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:32.177089", "step": 3611, "epoch": 2 }, { "type": "loss", "content": 0.000540457374881953, "timestamp": "2025-09-10 02:23:32.204872", "step": 3612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:32.236140", "step": 3612, "epoch": 2 }, { "type": "loss", "content": 0.0006599615444429219, "timestamp": "2025-09-10 02:23:32.240526", "step": 3613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:32.272703", "step": 3613, "epoch": 2 }, { "type": "loss", "content": 0.002096888143569231, "timestamp": "2025-09-10 02:23:32.280357", "step": 3614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:32.311532", "step": 3614, "epoch": 2 }, { "type": "loss", "content": 0.0028935037553310394, "timestamp": "2025-09-10 02:23:32.315934", "step": 3615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:32.347081", "step": 3615, "epoch": 2 }, { "type": "loss", "content": 0.0018138455925509334, "timestamp": "2025-09-10 02:23:32.372354", "step": 3616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:32.404503", "step": 3616, "epoch": 2 }, { "type": "loss", "content": 0.006520233117043972, "timestamp": "2025-09-10 02:23:32.409850", "step": 3617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:32.445950", "step": 3617, "epoch": 2 }, { "type": "loss", "content": 0.0026026610285043716, "timestamp": "2025-09-10 02:23:32.459332", "step": 3618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:32.499491", "step": 3618, "epoch": 2 }, { "type": "loss", "content": 0.011162899434566498, "timestamp": "2025-09-10 02:23:32.515406", "step": 3619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:32.549195", "step": 3619, "epoch": 2 }, { "type": "loss", "content": 0.0006067942013032734, "timestamp": "2025-09-10 02:23:32.580873", "step": 3620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:32.612421", "step": 3620, "epoch": 2 }, { "type": "loss", "content": 0.0009515011915937066, "timestamp": "2025-09-10 02:23:32.617085", "step": 3621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:32.650398", "step": 3621, "epoch": 2 }, { "type": "loss", "content": 0.0030977034475654364, "timestamp": "2025-09-10 02:23:32.657133", "step": 3622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:32.690597", "step": 3622, "epoch": 2 }, { "type": "loss", "content": 0.0023766574449837208, "timestamp": "2025-09-10 02:23:32.701178", "step": 3623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:32.732882", "step": 3623, "epoch": 2 }, { "type": "loss", "content": 0.050299059599637985, "timestamp": "2025-09-10 02:23:32.766351", "step": 3624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:32.798823", "step": 3624, "epoch": 2 }, { "type": "loss", "content": 0.0003462900349404663, "timestamp": "2025-09-10 02:23:32.806489", "step": 3625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:32.841734", "step": 3625, "epoch": 2 }, { "type": "loss", "content": 0.03242403641343117, "timestamp": "2025-09-10 02:23:32.854324", "step": 3626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:32.886966", "step": 3626, "epoch": 2 }, { "type": "loss", "content": 0.0009655548492446542, "timestamp": "2025-09-10 02:23:32.893816", "step": 3627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:32.925570", "step": 3627, "epoch": 2 }, { "type": "loss", "content": 0.011085288599133492, "timestamp": "2025-09-10 02:23:32.957370", "step": 3628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:23:33.001453", "step": 3628, "epoch": 2 }, { "type": "loss", "content": 0.0011424163822084665, "timestamp": "2025-09-10 02:23:33.018706", "step": 3629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:23:33.059054", "step": 3629, "epoch": 2 }, { "type": "loss", "content": 0.027174891903996468, "timestamp": "2025-09-10 02:23:33.074676", "step": 3630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:33.112865", "step": 3630, "epoch": 2 }, { "type": "loss", "content": 0.009898961521685123, "timestamp": "2025-09-10 02:23:33.116705", "step": 3631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.151599", "step": 3631, "epoch": 2 }, { "type": "loss", "content": 0.0001422611385351047, "timestamp": "2025-09-10 02:23:33.178550", "step": 3632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:33.216383", "step": 3632, "epoch": 2 }, { "type": "loss", "content": 0.006423108279705048, "timestamp": "2025-09-10 02:23:33.218613", "step": 3633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:33.250747", "step": 3633, "epoch": 2 }, { "type": "loss", "content": 0.0006503048934973776, "timestamp": "2025-09-10 02:23:33.254438", "step": 3634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:33.287142", "step": 3634, "epoch": 2 }, { "type": "loss", "content": 0.02117828093469143, "timestamp": "2025-09-10 02:23:33.297447", "step": 3635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.329371", "step": 3635, "epoch": 2 }, { "type": "loss", "content": 0.0025022621266543865, "timestamp": "2025-09-10 02:23:33.356913", "step": 3636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:33.388951", "step": 3636, "epoch": 2 }, { "type": "loss", "content": 0.004223259165883064, "timestamp": "2025-09-10 02:23:33.391527", "step": 3637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:23:33.426342", "step": 3637, "epoch": 2 }, { "type": "loss", "content": 0.03953773155808449, "timestamp": "2025-09-10 02:23:33.429345", "step": 3638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:33.460347", "step": 3638, "epoch": 2 }, { "type": "loss", "content": 0.025826627388596535, "timestamp": "2025-09-10 02:23:33.470332", "step": 3639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:33.505344", "step": 3639, "epoch": 2 }, { "type": "loss", "content": 0.0006002942100167274, "timestamp": "2025-09-10 02:23:33.540029", "step": 3640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.571550", "step": 3640, "epoch": 2 }, { "type": "loss", "content": 0.010516015812754631, "timestamp": "2025-09-10 02:23:33.576171", "step": 3641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.606991", "step": 3641, "epoch": 2 }, { "type": "loss", "content": 0.0020751620177179575, "timestamp": "2025-09-10 02:23:33.613847", "step": 3642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:33.645478", "step": 3642, "epoch": 2 }, { "type": "loss", "content": 0.0017879597144201398, "timestamp": "2025-09-10 02:23:33.657529", "step": 3643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:33.689679", "step": 3643, "epoch": 2 }, { "type": "loss", "content": 0.0004607823502738029, "timestamp": "2025-09-10 02:23:33.722339", "step": 3644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:33.753387", "step": 3644, "epoch": 2 }, { "type": "loss", "content": 0.012203511781990528, "timestamp": "2025-09-10 02:23:33.763904", "step": 3645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:33.797518", "step": 3645, "epoch": 2 }, { "type": "loss", "content": 0.0037501014303416014, "timestamp": "2025-09-10 02:23:33.810878", "step": 3646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.843012", "step": 3646, "epoch": 2 }, { "type": "loss", "content": 0.006922434084117413, "timestamp": "2025-09-10 02:23:33.849942", "step": 3647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:33.880986", "step": 3647, "epoch": 2 }, { "type": "loss", "content": 0.0018579624593257904, "timestamp": "2025-09-10 02:23:33.908723", "step": 3648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:33.939821", "step": 3648, "epoch": 2 }, { "type": "loss", "content": 0.0014378555351868272, "timestamp": "2025-09-10 02:23:33.943463", "step": 3649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:33.975165", "step": 3649, "epoch": 2 }, { "type": "loss", "content": 0.004263308364897966, "timestamp": "2025-09-10 02:23:33.985314", "step": 3650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:34.015655", "step": 3650, "epoch": 2 }, { "type": "loss", "content": 0.006144766230136156, "timestamp": "2025-09-10 02:23:34.026553", "step": 3651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:34.057068", "step": 3651, "epoch": 2 }, { "type": "loss", "content": 0.0022812257520854473, "timestamp": "2025-09-10 02:23:34.080881", "step": 3652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:34.111599", "step": 3652, "epoch": 2 }, { "type": "loss", "content": 0.013019556179642677, "timestamp": "2025-09-10 02:23:34.117102", "step": 3653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:34.151180", "step": 3653, "epoch": 2 }, { "type": "loss", "content": 0.008377458900213242, "timestamp": "2025-09-10 02:23:34.164994", "step": 3654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:34.195131", "step": 3654, "epoch": 2 }, { "type": "loss", "content": 0.003772433614358306, "timestamp": "2025-09-10 02:23:34.206283", "step": 3655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:34.236552", "step": 3655, "epoch": 2 }, { "type": "loss", "content": 0.017940301448106766, "timestamp": "2025-09-10 02:23:34.262014", "step": 3656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:34.294442", "step": 3656, "epoch": 2 }, { "type": "loss", "content": 0.0058494978584349155, "timestamp": "2025-09-10 02:23:34.298294", "step": 3657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:34.330418", "step": 3657, "epoch": 2 }, { "type": "loss", "content": 0.02427353337407112, "timestamp": "2025-09-10 02:23:34.342087", "step": 3658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:23:34.383500", "step": 3658, "epoch": 2 }, { "type": "loss", "content": 0.0017965204315260053, "timestamp": "2025-09-10 02:23:34.400621", "step": 3659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:34.431989", "step": 3659, "epoch": 2 }, { "type": "loss", "content": 0.0022874141577631235, "timestamp": "2025-09-10 02:23:34.459435", "step": 3660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:34.490403", "step": 3660, "epoch": 2 }, { "type": "loss", "content": 0.001652199076488614, "timestamp": "2025-09-10 02:23:34.495126", "step": 3661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:34.534987", "step": 3661, "epoch": 2 }, { "type": "loss", "content": 0.002010711934417486, "timestamp": "2025-09-10 02:23:34.550930", "step": 3662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:23:34.582182", "step": 3662, "epoch": 2 }, { "type": "loss", "content": 0.009694479405879974, "timestamp": "2025-09-10 02:23:34.584510", "step": 3663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:34.616529", "step": 3663, "epoch": 2 }, { "type": "loss", "content": 0.007335420232266188, "timestamp": "2025-09-10 02:23:34.647418", "step": 3664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:34.679314", "step": 3664, "epoch": 2 }, { "type": "loss", "content": 0.01509284321218729, "timestamp": "2025-09-10 02:23:34.691955", "step": 3665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:34.722912", "step": 3665, "epoch": 2 }, { "type": "loss", "content": 0.03952633589506149, "timestamp": "2025-09-10 02:23:34.730783", "step": 3666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:34.760989", "step": 3666, "epoch": 2 }, { "type": "loss", "content": 0.006663356442004442, "timestamp": "2025-09-10 02:23:34.765594", "step": 3667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:34.796418", "step": 3667, "epoch": 2 }, { "type": "loss", "content": 0.018776053562760353, "timestamp": "2025-09-10 02:23:34.824940", "step": 3668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:34.855613", "step": 3668, "epoch": 2 }, { "type": "loss", "content": 0.00801047496497631, "timestamp": "2025-09-10 02:23:34.863532", "step": 3669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:34.894177", "step": 3669, "epoch": 2 }, { "type": "loss", "content": 0.03632910177111626, "timestamp": "2025-09-10 02:23:34.905046", "step": 3670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:34.936404", "step": 3670, "epoch": 2 }, { "type": "loss", "content": 0.014353514648973942, "timestamp": "2025-09-10 02:23:34.948727", "step": 3671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:34.980357", "step": 3671, "epoch": 2 }, { "type": "loss", "content": 0.00914282537996769, "timestamp": "2025-09-10 02:23:35.007911", "step": 3672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:35.039344", "step": 3672, "epoch": 2 }, { "type": "loss", "content": 0.0020550028420984745, "timestamp": "2025-09-10 02:23:35.047649", "step": 3673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:35.082542", "step": 3673, "epoch": 2 }, { "type": "loss", "content": 0.0013698196271434426, "timestamp": "2025-09-10 02:23:35.096409", "step": 3674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:23:35.135446", "step": 3674, "epoch": 2 }, { "type": "loss", "content": 0.0010654388461261988, "timestamp": "2025-09-10 02:23:35.151603", "step": 3675, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:23:45.394972", "step": 3675, "epoch": 2 }, { "type": "pplx", "content": 21047841.64222782, "timestamp": "2025-09-10 02:23:45.398778", "step": 3675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:45.430060", "step": 3675, "epoch": 2 }, { "type": "loss", "content": 0.0022460322361439466, "timestamp": "2025-09-10 02:23:45.456947", "step": 3676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:45.488804", "step": 3676, "epoch": 2 }, { "type": "loss", "content": 0.00032661884324625134, "timestamp": "2025-09-10 02:23:45.498535", "step": 3677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:45.531376", "step": 3677, "epoch": 2 }, { "type": "loss", "content": 0.0034290028270334005, "timestamp": "2025-09-10 02:23:45.538293", "step": 3678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:45.569174", "step": 3678, "epoch": 2 }, { "type": "loss", "content": 0.001095048151910305, "timestamp": "2025-09-10 02:23:45.576755", "step": 3679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:45.607998", "step": 3679, "epoch": 2 }, { "type": "loss", "content": 0.005293484777212143, "timestamp": "2025-09-10 02:23:45.641456", "step": 3680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:45.673169", "step": 3680, "epoch": 2 }, { "type": "loss", "content": 0.005093970336019993, "timestamp": "2025-09-10 02:23:45.685830", "step": 3681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:45.716501", "step": 3681, "epoch": 2 }, { "type": "loss", "content": 0.0007117848144844174, "timestamp": "2025-09-10 02:23:45.728651", "step": 3682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:45.759255", "step": 3682, "epoch": 2 }, { "type": "loss", "content": 0.00283992663025856, "timestamp": "2025-09-10 02:23:45.770063", "step": 3683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:45.803452", "step": 3683, "epoch": 2 }, { "type": "loss", "content": 0.029055939987301826, "timestamp": "2025-09-10 02:23:45.837780", "step": 3684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:45.868244", "step": 3684, "epoch": 2 }, { "type": "loss", "content": 0.000742242147680372, "timestamp": "2025-09-10 02:23:45.873656", "step": 3685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:45.903744", "step": 3685, "epoch": 2 }, { "type": "loss", "content": 0.007362429518252611, "timestamp": "2025-09-10 02:23:45.910834", "step": 3686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:45.941967", "step": 3686, "epoch": 2 }, { "type": "loss", "content": 0.004590542521327734, "timestamp": "2025-09-10 02:23:45.954565", "step": 3687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:45.987952", "step": 3687, "epoch": 2 }, { "type": "loss", "content": 0.010698004625737667, "timestamp": "2025-09-10 02:23:46.022265", "step": 3688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:46.053140", "step": 3688, "epoch": 2 }, { "type": "loss", "content": 0.0017821193905547261, "timestamp": "2025-09-10 02:23:46.055294", "step": 3689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:46.085878", "step": 3689, "epoch": 2 }, { "type": "loss", "content": 0.010044259950518608, "timestamp": "2025-09-10 02:23:46.097980", "step": 3690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:46.129333", "step": 3690, "epoch": 2 }, { "type": "loss", "content": 0.0010982693638652563, "timestamp": "2025-09-10 02:23:46.140338", "step": 3691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:23:46.175154", "step": 3691, "epoch": 2 }, { "type": "loss", "content": 0.005969376303255558, "timestamp": "2025-09-10 02:23:46.209750", "step": 3692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:46.241338", "step": 3692, "epoch": 2 }, { "type": "loss", "content": 0.0015076607232913375, "timestamp": "2025-09-10 02:23:46.253943", "step": 3693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:46.285232", "step": 3693, "epoch": 2 }, { "type": "loss", "content": 0.0018409850308671594, "timestamp": "2025-09-10 02:23:46.289800", "step": 3694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:46.319885", "step": 3694, "epoch": 2 }, { "type": "loss", "content": 0.0012192835565656424, "timestamp": "2025-09-10 02:23:46.326508", "step": 3695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:46.359303", "step": 3695, "epoch": 2 }, { "type": "loss", "content": 0.002559660468250513, "timestamp": "2025-09-10 02:23:46.386965", "step": 3696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:46.426410", "step": 3696, "epoch": 2 }, { "type": "loss", "content": 0.0033039411064237356, "timestamp": "2025-09-10 02:23:46.436026", "step": 3697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:46.472303", "step": 3697, "epoch": 2 }, { "type": "loss", "content": 0.00010749106149887666, "timestamp": "2025-09-10 02:23:46.482552", "step": 3698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:46.513260", "step": 3698, "epoch": 2 }, { "type": "loss", "content": 0.025171758607029915, "timestamp": "2025-09-10 02:23:46.523468", "step": 3699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:46.553613", "step": 3699, "epoch": 2 }, { "type": "loss", "content": 0.0031803150195628405, "timestamp": "2025-09-10 02:23:46.581832", "step": 3700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:46.612784", "step": 3700, "epoch": 2 }, { "type": "loss", "content": 0.00467759370803833, "timestamp": "2025-09-10 02:23:46.622624", "step": 3701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:46.652511", "step": 3701, "epoch": 2 }, { "type": "loss", "content": 0.0028708036988973618, "timestamp": "2025-09-10 02:23:46.657095", "step": 3702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:46.688080", "step": 3702, "epoch": 2 }, { "type": "loss", "content": 0.0009561380720697343, "timestamp": "2025-09-10 02:23:46.699122", "step": 3703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:46.730072", "step": 3703, "epoch": 2 }, { "type": "loss", "content": 0.001121461158618331, "timestamp": "2025-09-10 02:23:46.757739", "step": 3704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:46.788304", "step": 3704, "epoch": 2 }, { "type": "loss", "content": 0.005151792895048857, "timestamp": "2025-09-10 02:23:46.793886", "step": 3705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:46.825904", "step": 3705, "epoch": 2 }, { "type": "loss", "content": 0.00032375051523558795, "timestamp": "2025-09-10 02:23:46.835936", "step": 3706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:46.867444", "step": 3706, "epoch": 2 }, { "type": "loss", "content": 0.005311047192662954, "timestamp": "2025-09-10 02:23:46.874324", "step": 3707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:46.904803", "step": 3707, "epoch": 2 }, { "type": "loss", "content": 0.018711045384407043, "timestamp": "2025-09-10 02:23:46.938269", "step": 3708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:46.970344", "step": 3708, "epoch": 2 }, { "type": "loss", "content": 0.00399330398067832, "timestamp": "2025-09-10 02:23:46.983435", "step": 3709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:47.015316", "step": 3709, "epoch": 2 }, { "type": "loss", "content": 0.0006306317518465221, "timestamp": "2025-09-10 02:23:47.027878", "step": 3710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:47.061711", "step": 3710, "epoch": 2 }, { "type": "loss", "content": 0.00018934406398329884, "timestamp": "2025-09-10 02:23:47.071989", "step": 3711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:47.104274", "step": 3711, "epoch": 2 }, { "type": "loss", "content": 0.013638163916766644, "timestamp": "2025-09-10 02:23:47.128327", "step": 3712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:47.162414", "step": 3712, "epoch": 2 }, { "type": "loss", "content": 0.031886231154203415, "timestamp": "2025-09-10 02:23:47.166905", "step": 3713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:47.204158", "step": 3713, "epoch": 2 }, { "type": "loss", "content": 0.0013043539365753531, "timestamp": "2025-09-10 02:23:47.217908", "step": 3714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:47.253639", "step": 3714, "epoch": 2 }, { "type": "loss", "content": 5.598477218882181e-05, "timestamp": "2025-09-10 02:23:47.257454", "step": 3715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:47.294393", "step": 3715, "epoch": 2 }, { "type": "loss", "content": 0.00044489253195934, "timestamp": "2025-09-10 02:23:47.327746", "step": 3716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:47.363529", "step": 3716, "epoch": 2 }, { "type": "loss", "content": 0.0010741885052993894, "timestamp": "2025-09-10 02:23:47.368139", "step": 3717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:47.418380", "step": 3717, "epoch": 2 }, { "type": "loss", "content": 0.003896048292517662, "timestamp": "2025-09-10 02:23:47.428484", "step": 3718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:47.470970", "step": 3718, "epoch": 2 }, { "type": "loss", "content": 0.002632845425978303, "timestamp": "2025-09-10 02:23:47.483553", "step": 3719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:23:47.529925", "step": 3719, "epoch": 2 }, { "type": "loss", "content": 0.023609664291143417, "timestamp": "2025-09-10 02:23:47.564504", "step": 3720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:47.605360", "step": 3720, "epoch": 2 }, { "type": "loss", "content": 0.018469881266355515, "timestamp": "2025-09-10 02:23:47.613216", "step": 3721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:23:47.657401", "step": 3721, "epoch": 2 }, { "type": "loss", "content": 0.0002696176525205374, "timestamp": "2025-09-10 02:23:47.671202", "step": 3722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:47.703080", "step": 3722, "epoch": 2 }, { "type": "loss", "content": 0.01182649191468954, "timestamp": "2025-09-10 02:23:47.710965", "step": 3723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:47.741115", "step": 3723, "epoch": 2 }, { "type": "loss", "content": 0.0016434434801340103, "timestamp": "2025-09-10 02:23:47.769176", "step": 3724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:47.801840", "step": 3724, "epoch": 2 }, { "type": "loss", "content": 0.009728246368467808, "timestamp": "2025-09-10 02:23:47.809393", "step": 3725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:47.842116", "step": 3725, "epoch": 2 }, { "type": "loss", "content": 0.0011867971625179052, "timestamp": "2025-09-10 02:23:47.849032", "step": 3726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:47.885747", "step": 3726, "epoch": 2 }, { "type": "loss", "content": 0.010371259413659573, "timestamp": "2025-09-10 02:23:47.899151", "step": 3727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:47.932148", "step": 3727, "epoch": 2 }, { "type": "loss", "content": 0.00210155313834548, "timestamp": "2025-09-10 02:23:47.962867", "step": 3728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:23:47.995654", "step": 3728, "epoch": 2 }, { "type": "loss", "content": 0.0019191886531189084, "timestamp": "2025-09-10 02:23:48.008286", "step": 3729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:48.039506", "step": 3729, "epoch": 2 }, { "type": "loss", "content": 0.00013749170466326177, "timestamp": "2025-09-10 02:23:48.044059", "step": 3730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:48.077114", "step": 3730, "epoch": 2 }, { "type": "loss", "content": 0.002893412485718727, "timestamp": "2025-09-10 02:23:48.084814", "step": 3731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:48.117583", "step": 3731, "epoch": 2 }, { "type": "loss", "content": 0.0007469491683878005, "timestamp": "2025-09-10 02:23:48.146222", "step": 3732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:48.178024", "step": 3732, "epoch": 2 }, { "type": "loss", "content": 0.0003932146355509758, "timestamp": "2025-09-10 02:23:48.186459", "step": 3733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:48.218119", "step": 3733, "epoch": 2 }, { "type": "loss", "content": 8.557453838875517e-05, "timestamp": "2025-09-10 02:23:48.221036", "step": 3734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:48.253673", "step": 3734, "epoch": 2 }, { "type": "loss", "content": 0.0002127924090018496, "timestamp": "2025-09-10 02:23:48.263724", "step": 3735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:48.295365", "step": 3735, "epoch": 2 }, { "type": "loss", "content": 0.00040841306326910853, "timestamp": "2025-09-10 02:23:48.320578", "step": 3736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:23:48.369927", "step": 3736, "epoch": 2 }, { "type": "loss", "content": 0.0010468022665008903, "timestamp": "2025-09-10 02:23:48.391699", "step": 3737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:48.421601", "step": 3737, "epoch": 2 }, { "type": "loss", "content": 0.0001885231613414362, "timestamp": "2025-09-10 02:23:48.428506", "step": 3738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:48.458297", "step": 3738, "epoch": 2 }, { "type": "loss", "content": 3.569291584426537e-05, "timestamp": "2025-09-10 02:23:48.462466", "step": 3739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:23:48.505255", "step": 3739, "epoch": 2 }, { "type": "loss", "content": 0.003039777046069503, "timestamp": "2025-09-10 02:23:48.543505", "step": 3740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:48.574502", "step": 3740, "epoch": 2 }, { "type": "loss", "content": 0.006935932207852602, "timestamp": "2025-09-10 02:23:48.584997", "step": 3741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:48.619172", "step": 3741, "epoch": 2 }, { "type": "loss", "content": 0.0013960804790258408, "timestamp": "2025-09-10 02:23:48.626071", "step": 3742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:48.660000", "step": 3742, "epoch": 2 }, { "type": "loss", "content": 0.0006007336778566241, "timestamp": "2025-09-10 02:23:48.664017", "step": 3743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:48.695629", "step": 3743, "epoch": 2 }, { "type": "loss", "content": 0.013558273203670979, "timestamp": "2025-09-10 02:23:48.727061", "step": 3744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:48.758708", "step": 3744, "epoch": 2 }, { "type": "loss", "content": 0.0007245481247082353, "timestamp": "2025-09-10 02:23:48.761187", "step": 3745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:48.791751", "step": 3745, "epoch": 2 }, { "type": "loss", "content": 0.05813758820295334, "timestamp": "2025-09-10 02:23:48.798627", "step": 3746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:48.839439", "step": 3746, "epoch": 2 }, { "type": "loss", "content": 0.0002415095950709656, "timestamp": "2025-09-10 02:23:48.845944", "step": 3747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:48.877586", "step": 3747, "epoch": 2 }, { "type": "loss", "content": 0.000962139165494591, "timestamp": "2025-09-10 02:23:48.905331", "step": 3748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:48.936723", "step": 3748, "epoch": 2 }, { "type": "loss", "content": 0.0019527226686477661, "timestamp": "2025-09-10 02:23:48.938587", "step": 3749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:48.968844", "step": 3749, "epoch": 2 }, { "type": "loss", "content": 0.0019534730818122625, "timestamp": "2025-09-10 02:23:48.975883", "step": 3750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:49.007630", "step": 3750, "epoch": 2 }, { "type": "loss", "content": 0.0006131255067884922, "timestamp": "2025-09-10 02:23:49.020206", "step": 3751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:49.058993", "step": 3751, "epoch": 2 }, { "type": "loss", "content": 0.0003716005012392998, "timestamp": "2025-09-10 02:23:49.095806", "step": 3752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:49.126990", "step": 3752, "epoch": 2 }, { "type": "loss", "content": 0.0004358034930191934, "timestamp": "2025-09-10 02:23:49.134452", "step": 3753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:49.165481", "step": 3753, "epoch": 2 }, { "type": "loss", "content": 0.00044732578680850565, "timestamp": "2025-09-10 02:23:49.169363", "step": 3754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:49.201372", "step": 3754, "epoch": 2 }, { "type": "loss", "content": 0.004059514496475458, "timestamp": "2025-09-10 02:23:49.208648", "step": 3755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:49.240352", "step": 3755, "epoch": 2 }, { "type": "loss", "content": 0.000174950881046243, "timestamp": "2025-09-10 02:23:49.265504", "step": 3756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:23:49.304855", "step": 3756, "epoch": 2 }, { "type": "loss", "content": 0.003618494840338826, "timestamp": "2025-09-10 02:23:49.307253", "step": 3757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:49.338656", "step": 3757, "epoch": 2 }, { "type": "loss", "content": 0.000870470714289695, "timestamp": "2025-09-10 02:23:49.345475", "step": 3758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:49.380400", "step": 3758, "epoch": 2 }, { "type": "loss", "content": 0.0002803669194690883, "timestamp": "2025-09-10 02:23:49.388113", "step": 3759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:49.419299", "step": 3759, "epoch": 2 }, { "type": "loss", "content": 0.02521214261651039, "timestamp": "2025-09-10 02:23:49.449879", "step": 3760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:49.480870", "step": 3760, "epoch": 2 }, { "type": "loss", "content": 0.03576240316033363, "timestamp": "2025-09-10 02:23:49.483063", "step": 3761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:23:49.534629", "step": 3761, "epoch": 2 }, { "type": "loss", "content": 0.043033067137002945, "timestamp": "2025-09-10 02:23:49.556117", "step": 3762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:49.587429", "step": 3762, "epoch": 2 }, { "type": "loss", "content": 0.0002879269886761904, "timestamp": "2025-09-10 02:23:49.591819", "step": 3763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:49.622714", "step": 3763, "epoch": 2 }, { "type": "loss", "content": 0.011223888956010342, "timestamp": "2025-09-10 02:23:49.647657", "step": 3764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:49.678505", "step": 3764, "epoch": 2 }, { "type": "loss", "content": 0.0006490662926808, "timestamp": "2025-09-10 02:23:49.680403", "step": 3765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:49.711153", "step": 3765, "epoch": 2 }, { "type": "loss", "content": 0.002436300739645958, "timestamp": "2025-09-10 02:23:49.718328", "step": 3766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:49.749636", "step": 3766, "epoch": 2 }, { "type": "loss", "content": 0.03835447505116463, "timestamp": "2025-09-10 02:23:49.753874", "step": 3767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:49.785074", "step": 3767, "epoch": 2 }, { "type": "loss", "content": 0.001985372742637992, "timestamp": "2025-09-10 02:23:49.813463", "step": 3768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:49.845131", "step": 3768, "epoch": 2 }, { "type": "loss", "content": 0.005818284582346678, "timestamp": "2025-09-10 02:23:49.847905", "step": 3769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:49.878162", "step": 3769, "epoch": 2 }, { "type": "loss", "content": 0.0013168009463697672, "timestamp": "2025-09-10 02:23:49.885062", "step": 3770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:49.918288", "step": 3770, "epoch": 2 }, { "type": "loss", "content": 0.0015119427116587758, "timestamp": "2025-09-10 02:23:49.925676", "step": 3771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:49.959836", "step": 3771, "epoch": 2 }, { "type": "loss", "content": 0.0005293331341817975, "timestamp": "2025-09-10 02:23:49.987984", "step": 3772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:50.019957", "step": 3772, "epoch": 2 }, { "type": "loss", "content": 0.0002255245781270787, "timestamp": "2025-09-10 02:23:50.029337", "step": 3773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:50.060821", "step": 3773, "epoch": 2 }, { "type": "loss", "content": 0.0010724698659032583, "timestamp": "2025-09-10 02:23:50.068255", "step": 3774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:50.099297", "step": 3774, "epoch": 2 }, { "type": "loss", "content": 0.0030956987757235765, "timestamp": "2025-09-10 02:23:50.105997", "step": 3775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:50.138577", "step": 3775, "epoch": 2 }, { "type": "loss", "content": 0.003467730712145567, "timestamp": "2025-09-10 02:23:50.170347", "step": 3776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:50.201636", "step": 3776, "epoch": 2 }, { "type": "loss", "content": 0.0005490719340741634, "timestamp": "2025-09-10 02:23:50.206811", "step": 3777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:50.239883", "step": 3777, "epoch": 2 }, { "type": "loss", "content": 0.0002172060776501894, "timestamp": "2025-09-10 02:23:50.247433", "step": 3778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:50.278787", "step": 3778, "epoch": 2 }, { "type": "loss", "content": 0.002384532243013382, "timestamp": "2025-09-10 02:23:50.286019", "step": 3779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:50.320002", "step": 3779, "epoch": 2 }, { "type": "loss", "content": 0.02834930457174778, "timestamp": "2025-09-10 02:23:50.347846", "step": 3780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:50.378714", "step": 3780, "epoch": 2 }, { "type": "loss", "content": 0.006114445626735687, "timestamp": "2025-09-10 02:23:50.381081", "step": 3781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:50.411840", "step": 3781, "epoch": 2 }, { "type": "loss", "content": 0.0004408101085573435, "timestamp": "2025-09-10 02:23:50.419203", "step": 3782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:50.449842", "step": 3782, "epoch": 2 }, { "type": "loss", "content": 0.0024946555495262146, "timestamp": "2025-09-10 02:23:50.457195", "step": 3783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:23:50.489297", "step": 3783, "epoch": 2 }, { "type": "loss", "content": 0.01270032487809658, "timestamp": "2025-09-10 02:23:50.521844", "step": 3784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:50.553321", "step": 3784, "epoch": 2 }, { "type": "loss", "content": 0.0033720643259584904, "timestamp": "2025-09-10 02:23:50.558695", "step": 3785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:50.589742", "step": 3785, "epoch": 2 }, { "type": "loss", "content": 0.05379801243543625, "timestamp": "2025-09-10 02:23:50.596659", "step": 3786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:50.627235", "step": 3786, "epoch": 2 }, { "type": "loss", "content": 0.0017733937129378319, "timestamp": "2025-09-10 02:23:50.634247", "step": 3787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:23:50.689983", "step": 3787, "epoch": 2 }, { "type": "loss", "content": 0.0037569236010313034, "timestamp": "2025-09-10 02:23:50.734334", "step": 3788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:50.765113", "step": 3788, "epoch": 2 }, { "type": "loss", "content": 0.0019782905001193285, "timestamp": "2025-09-10 02:23:50.769424", "step": 3789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:50.800084", "step": 3789, "epoch": 2 }, { "type": "loss", "content": 0.013935952447354794, "timestamp": "2025-09-10 02:23:50.807050", "step": 3790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:50.847666", "step": 3790, "epoch": 2 }, { "type": "loss", "content": 0.01583055406808853, "timestamp": "2025-09-10 02:23:50.852158", "step": 3791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:50.882829", "step": 3791, "epoch": 2 }, { "type": "loss", "content": 0.0005747873219661415, "timestamp": "2025-09-10 02:23:50.914311", "step": 3792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:50.947726", "step": 3792, "epoch": 2 }, { "type": "loss", "content": 0.0003728387819137424, "timestamp": "2025-09-10 02:23:50.955043", "step": 3793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:50.985737", "step": 3793, "epoch": 2 }, { "type": "loss", "content": 0.0017032746691256762, "timestamp": "2025-09-10 02:23:50.989812", "step": 3794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:23:51.021167", "step": 3794, "epoch": 2 }, { "type": "loss", "content": 0.0027263087686151266, "timestamp": "2025-09-10 02:23:51.023598", "step": 3795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:51.053872", "step": 3795, "epoch": 2 }, { "type": "loss", "content": 0.0019095286261290312, "timestamp": "2025-09-10 02:23:51.081640", "step": 3796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:51.113023", "step": 3796, "epoch": 2 }, { "type": "loss", "content": 0.0055357408709824085, "timestamp": "2025-09-10 02:23:51.120842", "step": 3797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:51.152210", "step": 3797, "epoch": 2 }, { "type": "loss", "content": 0.00228920322842896, "timestamp": "2025-09-10 02:23:51.162997", "step": 3798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:51.193944", "step": 3798, "epoch": 2 }, { "type": "loss", "content": 0.0029537074733525515, "timestamp": "2025-09-10 02:23:51.200712", "step": 3799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:51.231792", "step": 3799, "epoch": 2 }, { "type": "loss", "content": 0.009542775340378284, "timestamp": "2025-09-10 02:23:51.259863", "step": 3800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:51.292129", "step": 3800, "epoch": 2 }, { "type": "loss", "content": 0.0010026551317423582, "timestamp": "2025-09-10 02:23:51.297291", "step": 3801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:51.328429", "step": 3801, "epoch": 2 }, { "type": "loss", "content": 0.0001947238779393956, "timestamp": "2025-09-10 02:23:51.335239", "step": 3802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:51.366633", "step": 3802, "epoch": 2 }, { "type": "loss", "content": 0.0014312856364995241, "timestamp": "2025-09-10 02:23:51.373403", "step": 3803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:51.404802", "step": 3803, "epoch": 2 }, { "type": "loss", "content": 0.0010877539170905948, "timestamp": "2025-09-10 02:23:51.435519", "step": 3804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:23:51.472136", "step": 3804, "epoch": 2 }, { "type": "loss", "content": 0.0061668092384934425, "timestamp": "2025-09-10 02:23:51.487525", "step": 3805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:51.519845", "step": 3805, "epoch": 2 }, { "type": "loss", "content": 0.0013660427648574114, "timestamp": "2025-09-10 02:23:51.531967", "step": 3806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:51.564750", "step": 3806, "epoch": 2 }, { "type": "loss", "content": 0.00044723015162162483, "timestamp": "2025-09-10 02:23:51.574188", "step": 3807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:23:51.606066", "step": 3807, "epoch": 2 }, { "type": "loss", "content": 0.0004913516459055245, "timestamp": "2025-09-10 02:23:51.633627", "step": 3808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:23:51.665052", "step": 3808, "epoch": 2 }, { "type": "loss", "content": 0.005763310939073563, "timestamp": "2025-09-10 02:23:51.672360", "step": 3809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:51.704493", "step": 3809, "epoch": 2 }, { "type": "loss", "content": 0.0034396513365209103, "timestamp": "2025-09-10 02:23:51.711547", "step": 3810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:23:51.742543", "step": 3810, "epoch": 2 }, { "type": "loss", "content": 0.012839804403483868, "timestamp": "2025-09-10 02:23:51.754889", "step": 3811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:51.786891", "step": 3811, "epoch": 2 }, { "type": "loss", "content": 0.0012081711320206523, "timestamp": "2025-09-10 02:23:51.818243", "step": 3812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:51.849541", "step": 3812, "epoch": 2 }, { "type": "loss", "content": 0.0012805964797735214, "timestamp": "2025-09-10 02:23:51.854395", "step": 3813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:51.892405", "step": 3813, "epoch": 2 }, { "type": "loss", "content": 0.0016190716996788979, "timestamp": "2025-09-10 02:23:51.902950", "step": 3814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:51.934567", "step": 3814, "epoch": 2 }, { "type": "loss", "content": 0.00160274060908705, "timestamp": "2025-09-10 02:23:51.941937", "step": 3815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:23:51.973443", "step": 3815, "epoch": 2 }, { "type": "loss", "content": 0.007644836790859699, "timestamp": "2025-09-10 02:23:52.000824", "step": 3816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:23:52.034481", "step": 3816, "epoch": 2 }, { "type": "loss", "content": 0.030178042128682137, "timestamp": "2025-09-10 02:23:52.037320", "step": 3817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:23:52.069303", "step": 3817, "epoch": 2 }, { "type": "loss", "content": 0.00040458128205500543, "timestamp": "2025-09-10 02:23:52.076280", "step": 3818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:23:52.107864", "step": 3818, "epoch": 2 }, { "type": "loss", "content": 0.021405117586255074, "timestamp": "2025-09-10 02:23:52.115515", "step": 3819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:23:52.146252", "step": 3819, "epoch": 2 }, { "type": "loss", "content": 0.013643233105540276, "timestamp": "2025-09-10 02:23:52.177599", "step": 3820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:52.209358", "step": 3820, "epoch": 2 }, { "type": "loss", "content": 0.004835444502532482, "timestamp": "2025-09-10 02:23:52.211571", "step": 3821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:23:52.242137", "step": 3821, "epoch": 2 }, { "type": "loss", "content": 0.022363480180501938, "timestamp": "2025-09-10 02:23:52.246433", "step": 3822, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:24:02.339761", "step": 3822, "epoch": 2 }, { "type": "pplx", "content": 19433033.667341556, "timestamp": "2025-09-10 02:24:02.342208", "step": 3822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:24:02.381828", "step": 3822, "epoch": 2 }, { "type": "loss", "content": 0.043930236250162125, "timestamp": "2025-09-10 02:24:02.399123", "step": 3823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:02.438505", "step": 3823, "epoch": 2 }, { "type": "loss", "content": 0.004557534120976925, "timestamp": "2025-09-10 02:24:02.474955", "step": 3824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:02.509196", "step": 3824, "epoch": 2 }, { "type": "loss", "content": 0.0007657821988686919, "timestamp": "2025-09-10 02:24:02.513420", "step": 3825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:02.546830", "step": 3825, "epoch": 2 }, { "type": "loss", "content": 0.007892725057899952, "timestamp": "2025-09-10 02:24:02.558785", "step": 3826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:24:02.599847", "step": 3826, "epoch": 2 }, { "type": "loss", "content": 0.0026977970264852047, "timestamp": "2025-09-10 02:24:02.616867", "step": 3827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:02.647816", "step": 3827, "epoch": 2 }, { "type": "loss", "content": 0.0012466126354411244, "timestamp": "2025-09-10 02:24:02.675701", "step": 3828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:24:02.712249", "step": 3828, "epoch": 2 }, { "type": "loss", "content": 0.0026414524763822556, "timestamp": "2025-09-10 02:24:02.728133", "step": 3829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:02.761149", "step": 3829, "epoch": 2 }, { "type": "loss", "content": 0.000412534165661782, "timestamp": "2025-09-10 02:24:02.766841", "step": 3830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:02.801406", "step": 3830, "epoch": 2 }, { "type": "loss", "content": 0.004566606599837542, "timestamp": "2025-09-10 02:24:02.810504", "step": 3831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:02.842542", "step": 3831, "epoch": 2 }, { "type": "loss", "content": 0.003999842330813408, "timestamp": "2025-09-10 02:24:02.874692", "step": 3832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:02.907490", "step": 3832, "epoch": 2 }, { "type": "loss", "content": 0.028634166345000267, "timestamp": "2025-09-10 02:24:02.915127", "step": 3833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:02.947307", "step": 3833, "epoch": 2 }, { "type": "loss", "content": 0.010636513121426105, "timestamp": "2025-09-10 02:24:02.950689", "step": 3834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:02.982711", "step": 3834, "epoch": 2 }, { "type": "loss", "content": 0.00048292818246409297, "timestamp": "2025-09-10 02:24:02.993519", "step": 3835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:03.025164", "step": 3835, "epoch": 2 }, { "type": "loss", "content": 0.0005672777188010514, "timestamp": "2025-09-10 02:24:03.052537", "step": 3836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:03.084203", "step": 3836, "epoch": 2 }, { "type": "loss", "content": 0.0003444700560066849, "timestamp": "2025-09-10 02:24:03.093537", "step": 3837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:03.125051", "step": 3837, "epoch": 2 }, { "type": "loss", "content": 0.0015334226191043854, "timestamp": "2025-09-10 02:24:03.135462", "step": 3838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:03.166961", "step": 3838, "epoch": 2 }, { "type": "loss", "content": 0.0006245824624784291, "timestamp": "2025-09-10 02:24:03.177542", "step": 3839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:03.208216", "step": 3839, "epoch": 2 }, { "type": "loss", "content": 0.0002433412882965058, "timestamp": "2025-09-10 02:24:03.241567", "step": 3840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:03.273090", "step": 3840, "epoch": 2 }, { "type": "loss", "content": 0.0006837646360509098, "timestamp": "2025-09-10 02:24:03.277420", "step": 3841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:03.308079", "step": 3841, "epoch": 2 }, { "type": "loss", "content": 0.0020594163797795773, "timestamp": "2025-09-10 02:24:03.312065", "step": 3842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:03.342985", "step": 3842, "epoch": 2 }, { "type": "loss", "content": 0.0023391323629766703, "timestamp": "2025-09-10 02:24:03.347097", "step": 3843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:03.377572", "step": 3843, "epoch": 2 }, { "type": "loss", "content": 0.00048196568968705833, "timestamp": "2025-09-10 02:24:03.402366", "step": 3844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:03.433467", "step": 3844, "epoch": 2 }, { "type": "loss", "content": 0.002800496993586421, "timestamp": "2025-09-10 02:24:03.435938", "step": 3845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:03.466018", "step": 3845, "epoch": 2 }, { "type": "loss", "content": 0.0011212360113859177, "timestamp": "2025-09-10 02:24:03.469910", "step": 3846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:03.503751", "step": 3846, "epoch": 2 }, { "type": "loss", "content": 0.0017739442409947515, "timestamp": "2025-09-10 02:24:03.517821", "step": 3847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:03.548066", "step": 3847, "epoch": 2 }, { "type": "loss", "content": 0.002876395359635353, "timestamp": "2025-09-10 02:24:03.571371", "step": 3848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:03.601521", "step": 3848, "epoch": 2 }, { "type": "loss", "content": 0.0032014145981520414, "timestamp": "2025-09-10 02:24:03.605727", "step": 3849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:03.636647", "step": 3849, "epoch": 2 }, { "type": "loss", "content": 0.001701483502984047, "timestamp": "2025-09-10 02:24:03.646311", "step": 3850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:03.677513", "step": 3850, "epoch": 2 }, { "type": "loss", "content": 0.01332316268235445, "timestamp": "2025-09-10 02:24:03.684806", "step": 3851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:03.715898", "step": 3851, "epoch": 2 }, { "type": "loss", "content": 0.010636200197041035, "timestamp": "2025-09-10 02:24:03.740412", "step": 3852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:03.772090", "step": 3852, "epoch": 2 }, { "type": "loss", "content": 0.0018638168694451451, "timestamp": "2025-09-10 02:24:03.778728", "step": 3853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:03.810931", "step": 3853, "epoch": 2 }, { "type": "loss", "content": 0.0011919804383069277, "timestamp": "2025-09-10 02:24:03.820628", "step": 3854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:03.854201", "step": 3854, "epoch": 2 }, { "type": "loss", "content": 0.002363163512200117, "timestamp": "2025-09-10 02:24:03.860738", "step": 3855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:03.892534", "step": 3855, "epoch": 2 }, { "type": "loss", "content": 0.0008571963990107179, "timestamp": "2025-09-10 02:24:03.916324", "step": 3856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:03.947688", "step": 3856, "epoch": 2 }, { "type": "loss", "content": 0.001244921120814979, "timestamp": "2025-09-10 02:24:03.949591", "step": 3857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:03.979801", "step": 3857, "epoch": 2 }, { "type": "loss", "content": 0.0029732866678386927, "timestamp": "2025-09-10 02:24:03.986458", "step": 3858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:04.018877", "step": 3858, "epoch": 2 }, { "type": "loss", "content": 0.002771953120827675, "timestamp": "2025-09-10 02:24:04.028498", "step": 3859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:04.060667", "step": 3859, "epoch": 2 }, { "type": "loss", "content": 0.005387973506003618, "timestamp": "2025-09-10 02:24:04.088671", "step": 3860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:24:04.127687", "step": 3860, "epoch": 2 }, { "type": "loss", "content": 0.001176676363684237, "timestamp": "2025-09-10 02:24:04.144668", "step": 3861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:04.177813", "step": 3861, "epoch": 2 }, { "type": "loss", "content": 0.0007978384965099394, "timestamp": "2025-09-10 02:24:04.184388", "step": 3862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:04.219122", "step": 3862, "epoch": 2 }, { "type": "loss", "content": 0.02228599414229393, "timestamp": "2025-09-10 02:24:04.226335", "step": 3863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:04.258575", "step": 3863, "epoch": 2 }, { "type": "loss", "content": 0.003167739836499095, "timestamp": "2025-09-10 02:24:04.283761", "step": 3864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:24:04.321725", "step": 3864, "epoch": 2 }, { "type": "loss", "content": 0.0012020551366731524, "timestamp": "2025-09-10 02:24:04.337366", "step": 3865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:04.372743", "step": 3865, "epoch": 2 }, { "type": "loss", "content": 0.0009655249887146056, "timestamp": "2025-09-10 02:24:04.383198", "step": 3866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:04.423189", "step": 3866, "epoch": 2 }, { "type": "loss", "content": 0.0005044231074862182, "timestamp": "2025-09-10 02:24:04.430487", "step": 3867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:04.468856", "step": 3867, "epoch": 2 }, { "type": "loss", "content": 0.0006265141419135034, "timestamp": "2025-09-10 02:24:04.496358", "step": 3868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:04.535109", "step": 3868, "epoch": 2 }, { "type": "loss", "content": 0.0006974139832891524, "timestamp": "2025-09-10 02:24:04.539510", "step": 3869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:04.576441", "step": 3869, "epoch": 2 }, { "type": "loss", "content": 0.0025091536808758974, "timestamp": "2025-09-10 02:24:04.583740", "step": 3870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:04.616020", "step": 3870, "epoch": 2 }, { "type": "loss", "content": 0.002585696056485176, "timestamp": "2025-09-10 02:24:04.623367", "step": 3871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:04.653411", "step": 3871, "epoch": 2 }, { "type": "loss", "content": 0.00017903503612615168, "timestamp": "2025-09-10 02:24:04.676942", "step": 3872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:04.709069", "step": 3872, "epoch": 2 }, { "type": "loss", "content": 0.0017580740386620164, "timestamp": "2025-09-10 02:24:04.718216", "step": 3873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:04.749475", "step": 3873, "epoch": 2 }, { "type": "loss", "content": 0.0017524746945127845, "timestamp": "2025-09-10 02:24:04.755997", "step": 3874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:04.787618", "step": 3874, "epoch": 2 }, { "type": "loss", "content": 0.0003729330201167613, "timestamp": "2025-09-10 02:24:04.797160", "step": 3875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:04.828740", "step": 3875, "epoch": 2 }, { "type": "loss", "content": 0.002224268391728401, "timestamp": "2025-09-10 02:24:04.861938", "step": 3876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:04.893491", "step": 3876, "epoch": 2 }, { "type": "loss", "content": 0.0009001967846415937, "timestamp": "2025-09-10 02:24:04.897745", "step": 3877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:04.930422", "step": 3877, "epoch": 2 }, { "type": "loss", "content": 0.0016282566357403994, "timestamp": "2025-09-10 02:24:04.937753", "step": 3878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:04.969288", "step": 3878, "epoch": 2 }, { "type": "loss", "content": 0.040085725486278534, "timestamp": "2025-09-10 02:24:04.972968", "step": 3879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:05.004246", "step": 3879, "epoch": 2 }, { "type": "loss", "content": 0.0017969904001802206, "timestamp": "2025-09-10 02:24:05.032418", "step": 3880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:05.064709", "step": 3880, "epoch": 2 }, { "type": "loss", "content": 0.0019167568534612656, "timestamp": "2025-09-10 02:24:05.069317", "step": 3881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:05.100668", "step": 3881, "epoch": 2 }, { "type": "loss", "content": 0.000999232055619359, "timestamp": "2025-09-10 02:24:05.108173", "step": 3882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:05.140099", "step": 3882, "epoch": 2 }, { "type": "loss", "content": 0.0007008261163718998, "timestamp": "2025-09-10 02:24:05.142449", "step": 3883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:05.180595", "step": 3883, "epoch": 2 }, { "type": "loss", "content": 0.0016444515204057097, "timestamp": "2025-09-10 02:24:05.217354", "step": 3884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:05.251315", "step": 3884, "epoch": 2 }, { "type": "loss", "content": 0.0006205525132827461, "timestamp": "2025-09-10 02:24:05.259496", "step": 3885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:05.290788", "step": 3885, "epoch": 2 }, { "type": "loss", "content": 0.0010018015746027231, "timestamp": "2025-09-10 02:24:05.297304", "step": 3886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:05.329551", "step": 3886, "epoch": 2 }, { "type": "loss", "content": 0.0008122866274788976, "timestamp": "2025-09-10 02:24:05.336204", "step": 3887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:05.367674", "step": 3887, "epoch": 2 }, { "type": "loss", "content": 0.004226117394864559, "timestamp": "2025-09-10 02:24:05.398780", "step": 3888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:05.431026", "step": 3888, "epoch": 2 }, { "type": "loss", "content": 0.002288134302943945, "timestamp": "2025-09-10 02:24:05.435599", "step": 3889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:24:05.477036", "step": 3889, "epoch": 2 }, { "type": "loss", "content": 0.027158772572875023, "timestamp": "2025-09-10 02:24:05.494244", "step": 3890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:05.525912", "step": 3890, "epoch": 2 }, { "type": "loss", "content": 0.001486063003540039, "timestamp": "2025-09-10 02:24:05.535115", "step": 3891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:05.566775", "step": 3891, "epoch": 2 }, { "type": "loss", "content": 0.0014154304517433047, "timestamp": "2025-09-10 02:24:05.591814", "step": 3892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:05.623348", "step": 3892, "epoch": 2 }, { "type": "loss", "content": 0.015531855635344982, "timestamp": "2025-09-10 02:24:05.627610", "step": 3893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:05.658895", "step": 3893, "epoch": 2 }, { "type": "loss", "content": 0.0005819514626637101, "timestamp": "2025-09-10 02:24:05.661365", "step": 3894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:05.692262", "step": 3894, "epoch": 2 }, { "type": "loss", "content": 0.0014906743308529258, "timestamp": "2025-09-10 02:24:05.696070", "step": 3895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:05.726485", "step": 3895, "epoch": 2 }, { "type": "loss", "content": 0.00021287697018124163, "timestamp": "2025-09-10 02:24:05.757225", "step": 3896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:05.787897", "step": 3896, "epoch": 2 }, { "type": "loss", "content": 0.001206160755828023, "timestamp": "2025-09-10 02:24:05.793073", "step": 3897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:05.824256", "step": 3897, "epoch": 2 }, { "type": "loss", "content": 0.0016391824465245008, "timestamp": "2025-09-10 02:24:05.826387", "step": 3898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:05.857451", "step": 3898, "epoch": 2 }, { "type": "loss", "content": 0.0073780762031674385, "timestamp": "2025-09-10 02:24:05.868009", "step": 3899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:05.902995", "step": 3899, "epoch": 2 }, { "type": "loss", "content": 0.0006778707611374557, "timestamp": "2025-09-10 02:24:05.930468", "step": 3900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:05.961954", "step": 3900, "epoch": 2 }, { "type": "loss", "content": 0.0003114322025794536, "timestamp": "2025-09-10 02:24:05.969241", "step": 3901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:06.002522", "step": 3901, "epoch": 2 }, { "type": "loss", "content": 0.0023112166672945023, "timestamp": "2025-09-10 02:24:06.016152", "step": 3902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:06.047931", "step": 3902, "epoch": 2 }, { "type": "loss", "content": 0.0018408901523798704, "timestamp": "2025-09-10 02:24:06.054449", "step": 3903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:06.085284", "step": 3903, "epoch": 2 }, { "type": "loss", "content": 0.0006988499662838876, "timestamp": "2025-09-10 02:24:06.110276", "step": 3904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:06.142214", "step": 3904, "epoch": 2 }, { "type": "loss", "content": 0.0008732205023989081, "timestamp": "2025-09-10 02:24:06.149419", "step": 3905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:06.180067", "step": 3905, "epoch": 2 }, { "type": "loss", "content": 0.0010088557610288262, "timestamp": "2025-09-10 02:24:06.187424", "step": 3906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:06.219238", "step": 3906, "epoch": 2 }, { "type": "loss", "content": 0.0009297534124925733, "timestamp": "2025-09-10 02:24:06.223486", "step": 3907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:06.254588", "step": 3907, "epoch": 2 }, { "type": "loss", "content": 0.002321895444765687, "timestamp": "2025-09-10 02:24:06.286023", "step": 3908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:06.317271", "step": 3908, "epoch": 2 }, { "type": "loss", "content": 0.007969407364726067, "timestamp": "2025-09-10 02:24:06.319873", "step": 3909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:06.350925", "step": 3909, "epoch": 2 }, { "type": "loss", "content": 0.0036575000267475843, "timestamp": "2025-09-10 02:24:06.357603", "step": 3910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:06.389054", "step": 3910, "epoch": 2 }, { "type": "loss", "content": 0.004755291156470776, "timestamp": "2025-09-10 02:24:06.395946", "step": 3911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:06.427525", "step": 3911, "epoch": 2 }, { "type": "loss", "content": 0.0007069699349813163, "timestamp": "2025-09-10 02:24:06.452159", "step": 3912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:06.483179", "step": 3912, "epoch": 2 }, { "type": "loss", "content": 0.02136976644396782, "timestamp": "2025-09-10 02:24:06.493098", "step": 3913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:06.523811", "step": 3913, "epoch": 2 }, { "type": "loss", "content": 0.01693398505449295, "timestamp": "2025-09-10 02:24:06.531364", "step": 3914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:06.563605", "step": 3914, "epoch": 2 }, { "type": "loss", "content": 0.002733456203714013, "timestamp": "2025-09-10 02:24:06.570557", "step": 3915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:06.601457", "step": 3915, "epoch": 2 }, { "type": "loss", "content": 0.010088739916682243, "timestamp": "2025-09-10 02:24:06.629030", "step": 3916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:06.659845", "step": 3916, "epoch": 2 }, { "type": "loss", "content": 0.0005804885877296329, "timestamp": "2025-09-10 02:24:06.664255", "step": 3917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:06.695717", "step": 3917, "epoch": 2 }, { "type": "loss", "content": 0.0004878589534200728, "timestamp": "2025-09-10 02:24:06.707563", "step": 3918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:06.738941", "step": 3918, "epoch": 2 }, { "type": "loss", "content": 0.0038721126038581133, "timestamp": "2025-09-10 02:24:06.751502", "step": 3919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:06.784020", "step": 3919, "epoch": 2 }, { "type": "loss", "content": 0.0008839900838211179, "timestamp": "2025-09-10 02:24:06.811653", "step": 3920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:06.843427", "step": 3920, "epoch": 2 }, { "type": "loss", "content": 0.0010170344030484557, "timestamp": "2025-09-10 02:24:06.845414", "step": 3921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:06.876709", "step": 3921, "epoch": 2 }, { "type": "loss", "content": 0.000293319666525349, "timestamp": "2025-09-10 02:24:06.880635", "step": 3922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:06.911713", "step": 3922, "epoch": 2 }, { "type": "loss", "content": 0.001344728167168796, "timestamp": "2025-09-10 02:24:06.918603", "step": 3923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:06.952454", "step": 3923, "epoch": 2 }, { "type": "loss", "content": 0.00218668463639915, "timestamp": "2025-09-10 02:24:06.983868", "step": 3924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:07.014758", "step": 3924, "epoch": 2 }, { "type": "loss", "content": 0.008733495138585567, "timestamp": "2025-09-10 02:24:07.019956", "step": 3925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:07.053619", "step": 3925, "epoch": 2 }, { "type": "loss", "content": 0.000639695324935019, "timestamp": "2025-09-10 02:24:07.067069", "step": 3926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:07.098065", "step": 3926, "epoch": 2 }, { "type": "loss", "content": 0.0011399161303415895, "timestamp": "2025-09-10 02:24:07.100522", "step": 3927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:07.131720", "step": 3927, "epoch": 2 }, { "type": "loss", "content": 0.0007331220549531281, "timestamp": "2025-09-10 02:24:07.160224", "step": 3928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:07.192773", "step": 3928, "epoch": 2 }, { "type": "loss", "content": 0.00026727074873633683, "timestamp": "2025-09-10 02:24:07.205833", "step": 3929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:07.237355", "step": 3929, "epoch": 2 }, { "type": "loss", "content": 0.0014935116050764918, "timestamp": "2025-09-10 02:24:07.249171", "step": 3930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:07.279958", "step": 3930, "epoch": 2 }, { "type": "loss", "content": 0.0003637108893599361, "timestamp": "2025-09-10 02:24:07.284267", "step": 3931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:07.315434", "step": 3931, "epoch": 2 }, { "type": "loss", "content": 0.00031519282492809, "timestamp": "2025-09-10 02:24:07.340156", "step": 3932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:07.372778", "step": 3932, "epoch": 2 }, { "type": "loss", "content": 0.001294466550461948, "timestamp": "2025-09-10 02:24:07.376862", "step": 3933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:07.408613", "step": 3933, "epoch": 2 }, { "type": "loss", "content": 0.0013076617615297437, "timestamp": "2025-09-10 02:24:07.412877", "step": 3934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:07.443775", "step": 3934, "epoch": 2 }, { "type": "loss", "content": 0.0009365587611682713, "timestamp": "2025-09-10 02:24:07.450806", "step": 3935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:07.481976", "step": 3935, "epoch": 2 }, { "type": "loss", "content": 0.000639063015114516, "timestamp": "2025-09-10 02:24:07.506020", "step": 3936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:07.536530", "step": 3936, "epoch": 2 }, { "type": "loss", "content": 0.008496752008795738, "timestamp": "2025-09-10 02:24:07.541316", "step": 3937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:07.572193", "step": 3937, "epoch": 2 }, { "type": "loss", "content": 0.0006357203237712383, "timestamp": "2025-09-10 02:24:07.579907", "step": 3938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:07.610407", "step": 3938, "epoch": 2 }, { "type": "loss", "content": 0.00021839377586729825, "timestamp": "2025-09-10 02:24:07.617289", "step": 3939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:07.648094", "step": 3939, "epoch": 2 }, { "type": "loss", "content": 0.0003433347155805677, "timestamp": "2025-09-10 02:24:07.681085", "step": 3940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:07.712244", "step": 3940, "epoch": 2 }, { "type": "loss", "content": 0.0004599998064804822, "timestamp": "2025-09-10 02:24:07.716927", "step": 3941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:07.747344", "step": 3941, "epoch": 2 }, { "type": "loss", "content": 0.008438892662525177, "timestamp": "2025-09-10 02:24:07.758360", "step": 3942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:07.789709", "step": 3942, "epoch": 2 }, { "type": "loss", "content": 0.0013779483269900084, "timestamp": "2025-09-10 02:24:07.802315", "step": 3943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:07.832842", "step": 3943, "epoch": 2 }, { "type": "loss", "content": 0.0002914096985477954, "timestamp": "2025-09-10 02:24:07.860587", "step": 3944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:07.894923", "step": 3944, "epoch": 2 }, { "type": "loss", "content": 0.0012282740790396929, "timestamp": "2025-09-10 02:24:07.902646", "step": 3945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:07.932838", "step": 3945, "epoch": 2 }, { "type": "loss", "content": 0.0001276891416637227, "timestamp": "2025-09-10 02:24:07.940473", "step": 3946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:07.970465", "step": 3946, "epoch": 2 }, { "type": "loss", "content": 0.006276755593717098, "timestamp": "2025-09-10 02:24:07.981506", "step": 3947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:08.012106", "step": 3947, "epoch": 2 }, { "type": "loss", "content": 0.0029240294825285673, "timestamp": "2025-09-10 02:24:08.043281", "step": 3948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:08.073563", "step": 3948, "epoch": 2 }, { "type": "loss", "content": 0.0004812986881006509, "timestamp": "2025-09-10 02:24:08.079004", "step": 3949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:24:08.108621", "step": 3949, "epoch": 2 }, { "type": "loss", "content": 0.00047114197514019907, "timestamp": "2025-09-10 02:24:08.111228", "step": 3950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:08.141972", "step": 3950, "epoch": 2 }, { "type": "loss", "content": 0.0006882947636768222, "timestamp": "2025-09-10 02:24:08.154144", "step": 3951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:08.184207", "step": 3951, "epoch": 2 }, { "type": "loss", "content": 0.003981906455010176, "timestamp": "2025-09-10 02:24:08.212332", "step": 3952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:08.243261", "step": 3952, "epoch": 2 }, { "type": "loss", "content": 0.0001539530057925731, "timestamp": "2025-09-10 02:24:08.245687", "step": 3953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:08.276533", "step": 3953, "epoch": 2 }, { "type": "loss", "content": 0.001202300889417529, "timestamp": "2025-09-10 02:24:08.286904", "step": 3954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:08.317017", "step": 3954, "epoch": 2 }, { "type": "loss", "content": 0.0010202035773545504, "timestamp": "2025-09-10 02:24:08.327013", "step": 3955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:08.357903", "step": 3955, "epoch": 2 }, { "type": "loss", "content": 0.005083515774458647, "timestamp": "2025-09-10 02:24:08.386110", "step": 3956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:08.416923", "step": 3956, "epoch": 2 }, { "type": "loss", "content": 0.00014192526577971876, "timestamp": "2025-09-10 02:24:08.429592", "step": 3957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:08.458783", "step": 3957, "epoch": 2 }, { "type": "loss", "content": 0.0236373208463192, "timestamp": "2025-09-10 02:24:08.463031", "step": 3958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:08.493098", "step": 3958, "epoch": 2 }, { "type": "loss", "content": 0.004410702269524336, "timestamp": "2025-09-10 02:24:08.505336", "step": 3959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:08.536639", "step": 3959, "epoch": 2 }, { "type": "loss", "content": 0.0011517350794747472, "timestamp": "2025-09-10 02:24:08.565282", "step": 3960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:08.596232", "step": 3960, "epoch": 2 }, { "type": "loss", "content": 0.0006834971136413515, "timestamp": "2025-09-10 02:24:08.606704", "step": 3961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:08.637610", "step": 3961, "epoch": 2 }, { "type": "loss", "content": 0.00012764804705511779, "timestamp": "2025-09-10 02:24:08.644426", "step": 3962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:08.678508", "step": 3962, "epoch": 2 }, { "type": "loss", "content": 0.0006874548853375018, "timestamp": "2025-09-10 02:24:08.685403", "step": 3963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:08.716274", "step": 3963, "epoch": 2 }, { "type": "loss", "content": 0.0035995282232761383, "timestamp": "2025-09-10 02:24:08.744389", "step": 3964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:08.777692", "step": 3964, "epoch": 2 }, { "type": "loss", "content": 0.014470146968960762, "timestamp": "2025-09-10 02:24:08.782058", "step": 3965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:08.813486", "step": 3965, "epoch": 2 }, { "type": "loss", "content": 0.019030440598726273, "timestamp": "2025-09-10 02:24:08.817956", "step": 3966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:08.848437", "step": 3966, "epoch": 2 }, { "type": "loss", "content": 0.0006014609825797379, "timestamp": "2025-09-10 02:24:08.855440", "step": 3967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:08.887013", "step": 3967, "epoch": 2 }, { "type": "loss", "content": 0.0009079644805751741, "timestamp": "2025-09-10 02:24:08.915256", "step": 3968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:08.946655", "step": 3968, "epoch": 2 }, { "type": "loss", "content": 0.0007088962593115866, "timestamp": "2025-09-10 02:24:08.951854", "step": 3969, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:24:19.403820", "step": 3969, "epoch": 2 }, { "type": "pplx", "content": 22799844.439538065, "timestamp": "2025-09-10 02:24:19.408402", "step": 3969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:19.442139", "step": 3969, "epoch": 2 }, { "type": "loss", "content": 0.0003047510690521449, "timestamp": "2025-09-10 02:24:19.455828", "step": 3970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:19.495658", "step": 3970, "epoch": 2 }, { "type": "loss", "content": 0.005057158879935741, "timestamp": "2025-09-10 02:24:19.511583", "step": 3971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:19.544202", "step": 3971, "epoch": 2 }, { "type": "loss", "content": 0.008785208687186241, "timestamp": "2025-09-10 02:24:19.572213", "step": 3972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:19.610162", "step": 3972, "epoch": 2 }, { "type": "loss", "content": 0.006338917650282383, "timestamp": "2025-09-10 02:24:19.617256", "step": 3973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:19.648959", "step": 3973, "epoch": 2 }, { "type": "loss", "content": 0.0006893486715853214, "timestamp": "2025-09-10 02:24:19.655825", "step": 3974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:19.692474", "step": 3974, "epoch": 2 }, { "type": "loss", "content": 0.0003333700296934694, "timestamp": "2025-09-10 02:24:19.697017", "step": 3975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:19.727698", "step": 3975, "epoch": 2 }, { "type": "loss", "content": 8.453882037429139e-05, "timestamp": "2025-09-10 02:24:19.760764", "step": 3976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:19.791106", "step": 3976, "epoch": 2 }, { "type": "loss", "content": 0.000572329037822783, "timestamp": "2025-09-10 02:24:19.795706", "step": 3977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:19.826261", "step": 3977, "epoch": 2 }, { "type": "loss", "content": 0.02135993354022503, "timestamp": "2025-09-10 02:24:19.834036", "step": 3978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:19.864596", "step": 3978, "epoch": 2 }, { "type": "loss", "content": 0.009581172838807106, "timestamp": "2025-09-10 02:24:19.877140", "step": 3979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:19.907721", "step": 3979, "epoch": 2 }, { "type": "loss", "content": 0.009216717444360256, "timestamp": "2025-09-10 02:24:19.936432", "step": 3980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:19.965471", "step": 3980, "epoch": 2 }, { "type": "loss", "content": 0.0003136309387627989, "timestamp": "2025-09-10 02:24:19.970967", "step": 3981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:20.001248", "step": 3981, "epoch": 2 }, { "type": "loss", "content": 0.002166020916774869, "timestamp": "2025-09-10 02:24:20.008799", "step": 3982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:20.044066", "step": 3982, "epoch": 2 }, { "type": "loss", "content": 0.0010525870602577925, "timestamp": "2025-09-10 02:24:20.056313", "step": 3983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:20.086574", "step": 3983, "epoch": 2 }, { "type": "loss", "content": 0.0007074028253555298, "timestamp": "2025-09-10 02:24:20.114464", "step": 3984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:20.149163", "step": 3984, "epoch": 2 }, { "type": "loss", "content": 0.017534593120217323, "timestamp": "2025-09-10 02:24:20.151241", "step": 3985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:20.182337", "step": 3985, "epoch": 2 }, { "type": "loss", "content": 0.0005388972931541502, "timestamp": "2025-09-10 02:24:20.189384", "step": 3986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:20.219925", "step": 3986, "epoch": 2 }, { "type": "loss", "content": 0.020015867426991463, "timestamp": "2025-09-10 02:24:20.230202", "step": 3987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:20.265735", "step": 3987, "epoch": 2 }, { "type": "loss", "content": 0.0004713798116426915, "timestamp": "2025-09-10 02:24:20.290709", "step": 3988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:20.323084", "step": 3988, "epoch": 2 }, { "type": "loss", "content": 0.0058451988734304905, "timestamp": "2025-09-10 02:24:20.330695", "step": 3989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:20.363663", "step": 3989, "epoch": 2 }, { "type": "loss", "content": 0.026393314823508263, "timestamp": "2025-09-10 02:24:20.370585", "step": 3990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:20.413538", "step": 3990, "epoch": 2 }, { "type": "loss", "content": 0.0007539827493019402, "timestamp": "2025-09-10 02:24:20.423882", "step": 3991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:20.470114", "step": 3991, "epoch": 2 }, { "type": "loss", "content": 0.013442503288388252, "timestamp": "2025-09-10 02:24:20.495116", "step": 3992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:20.526812", "step": 3992, "epoch": 2 }, { "type": "loss", "content": 0.0014537216629832983, "timestamp": "2025-09-10 02:24:20.529200", "step": 3993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:20.560254", "step": 3993, "epoch": 2 }, { "type": "loss", "content": 0.0008240799652412534, "timestamp": "2025-09-10 02:24:20.568128", "step": 3994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:20.599170", "step": 3994, "epoch": 2 }, { "type": "loss", "content": 0.0015933796530589461, "timestamp": "2025-09-10 02:24:20.603231", "step": 3995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:20.633342", "step": 3995, "epoch": 2 }, { "type": "loss", "content": 0.03594691678881645, "timestamp": "2025-09-10 02:24:20.658890", "step": 3996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:20.689281", "step": 3996, "epoch": 2 }, { "type": "loss", "content": 0.000319391256198287, "timestamp": "2025-09-10 02:24:20.694555", "step": 3997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:20.724910", "step": 3997, "epoch": 2 }, { "type": "loss", "content": 0.0003305670979898423, "timestamp": "2025-09-10 02:24:20.735146", "step": 3998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:20.764227", "step": 3998, "epoch": 2 }, { "type": "loss", "content": 8.796909969532862e-05, "timestamp": "2025-09-10 02:24:20.771237", "step": 3999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:20.800926", "step": 3999, "epoch": 2 }, { "type": "loss", "content": 0.00020751934789586812, "timestamp": "2025-09-10 02:24:20.833887", "step": 4000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 4000", "timestamp": "2025-09-10 02:24:25.468790", "step": 4000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:25.512847", "step": 4000, "epoch": 2 }, { "type": "loss", "content": 0.0006269075674936175, "timestamp": "2025-09-10 02:24:25.516078", "step": 4001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:25.549501", "step": 4001, "epoch": 2 }, { "type": "loss", "content": 0.00019980034267064184, "timestamp": "2025-09-10 02:24:25.555522", "step": 4002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:25.589132", "step": 4002, "epoch": 2 }, { "type": "loss", "content": 0.003651339327916503, "timestamp": "2025-09-10 02:24:25.595903", "step": 4003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:25.628240", "step": 4003, "epoch": 2 }, { "type": "loss", "content": 0.0007770135416649282, "timestamp": "2025-09-10 02:24:25.656161", "step": 4004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:25.690389", "step": 4004, "epoch": 2 }, { "type": "loss", "content": 0.0008123559528030455, "timestamp": "2025-09-10 02:24:25.703360", "step": 4005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:25.737137", "step": 4005, "epoch": 2 }, { "type": "loss", "content": 0.01008316408842802, "timestamp": "2025-09-10 02:24:25.741004", "step": 4006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:25.773651", "step": 4006, "epoch": 2 }, { "type": "loss", "content": 0.004857528023421764, "timestamp": "2025-09-10 02:24:25.781095", "step": 4007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:25.814355", "step": 4007, "epoch": 2 }, { "type": "loss", "content": 0.0003992785350419581, "timestamp": "2025-09-10 02:24:25.842407", "step": 4008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:25.876193", "step": 4008, "epoch": 2 }, { "type": "loss", "content": 0.000981758115813136, "timestamp": "2025-09-10 02:24:25.881199", "step": 4009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:25.921274", "step": 4009, "epoch": 2 }, { "type": "loss", "content": 0.0010118504287675023, "timestamp": "2025-09-10 02:24:25.936833", "step": 4010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:25.979672", "step": 4010, "epoch": 2 }, { "type": "loss", "content": 0.0013079562922939658, "timestamp": "2025-09-10 02:24:25.986995", "step": 4011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:26.022946", "step": 4011, "epoch": 2 }, { "type": "loss", "content": 0.016620881855487823, "timestamp": "2025-09-10 02:24:26.050856", "step": 4012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:26.083657", "step": 4012, "epoch": 2 }, { "type": "loss", "content": 0.05415716394782066, "timestamp": "2025-09-10 02:24:26.087661", "step": 4013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:26.128422", "step": 4013, "epoch": 2 }, { "type": "loss", "content": 0.0009211709839291871, "timestamp": "2025-09-10 02:24:26.144033", "step": 4014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:24:26.187373", "step": 4014, "epoch": 2 }, { "type": "loss", "content": 0.0036754843313246965, "timestamp": "2025-09-10 02:24:26.204433", "step": 4015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:26.238695", "step": 4015, "epoch": 2 }, { "type": "loss", "content": 0.0007402479532174766, "timestamp": "2025-09-10 02:24:26.267096", "step": 4016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:26.298194", "step": 4016, "epoch": 2 }, { "type": "loss", "content": 0.0003799795522354543, "timestamp": "2025-09-10 02:24:26.302393", "step": 4017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:26.335985", "step": 4017, "epoch": 2 }, { "type": "loss", "content": 0.0001108443975681439, "timestamp": "2025-09-10 02:24:26.348368", "step": 4018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:26.385532", "step": 4018, "epoch": 2 }, { "type": "loss", "content": 0.007017719559371471, "timestamp": "2025-09-10 02:24:26.398073", "step": 4019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:24:26.444594", "step": 4019, "epoch": 2 }, { "type": "loss", "content": 0.021222028881311417, "timestamp": "2025-09-10 02:24:26.483027", "step": 4020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:26.516785", "step": 4020, "epoch": 2 }, { "type": "loss", "content": 0.0004138918302487582, "timestamp": "2025-09-10 02:24:26.526789", "step": 4021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:26.559474", "step": 4021, "epoch": 2 }, { "type": "loss", "content": 0.011343798600137234, "timestamp": "2025-09-10 02:24:26.566101", "step": 4022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:26.600887", "step": 4022, "epoch": 2 }, { "type": "loss", "content": 0.007483073975890875, "timestamp": "2025-09-10 02:24:26.604769", "step": 4023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:26.638117", "step": 4023, "epoch": 2 }, { "type": "loss", "content": 0.003005236154422164, "timestamp": "2025-09-10 02:24:26.662996", "step": 4024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:26.695391", "step": 4024, "epoch": 2 }, { "type": "loss", "content": 0.001247288309969008, "timestamp": "2025-09-10 02:24:26.705059", "step": 4025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:26.738860", "step": 4025, "epoch": 2 }, { "type": "loss", "content": 0.006834504660218954, "timestamp": "2025-09-10 02:24:26.749292", "step": 4026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:26.785078", "step": 4026, "epoch": 2 }, { "type": "loss", "content": 0.013955286704003811, "timestamp": "2025-09-10 02:24:26.791860", "step": 4027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:26.825667", "step": 4027, "epoch": 2 }, { "type": "loss", "content": 0.0021858804393559694, "timestamp": "2025-09-10 02:24:26.850910", "step": 4028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:26.883376", "step": 4028, "epoch": 2 }, { "type": "loss", "content": 0.00010486682003829628, "timestamp": "2025-09-10 02:24:26.888564", "step": 4029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:26.924331", "step": 4029, "epoch": 2 }, { "type": "loss", "content": 0.00023303573834709823, "timestamp": "2025-09-10 02:24:26.931664", "step": 4030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:26.971160", "step": 4030, "epoch": 2 }, { "type": "loss", "content": 0.00035887552076019347, "timestamp": "2025-09-10 02:24:26.981146", "step": 4031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:27.012992", "step": 4031, "epoch": 2 }, { "type": "loss", "content": 0.0004720363358501345, "timestamp": "2025-09-10 02:24:27.040899", "step": 4032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:27.074417", "step": 4032, "epoch": 2 }, { "type": "loss", "content": 0.031031426042318344, "timestamp": "2025-09-10 02:24:27.083836", "step": 4033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:27.116891", "step": 4033, "epoch": 2 }, { "type": "loss", "content": 0.0011731393169611692, "timestamp": "2025-09-10 02:24:27.123439", "step": 4034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:27.156461", "step": 4034, "epoch": 2 }, { "type": "loss", "content": 0.0004593496269080788, "timestamp": "2025-09-10 02:24:27.158888", "step": 4035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:27.194989", "step": 4035, "epoch": 2 }, { "type": "loss", "content": 0.005191961769014597, "timestamp": "2025-09-10 02:24:27.226408", "step": 4036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:24:27.267895", "step": 4036, "epoch": 2 }, { "type": "loss", "content": 0.0007114148465916514, "timestamp": "2025-09-10 02:24:27.283581", "step": 4037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:27.317423", "step": 4037, "epoch": 2 }, { "type": "loss", "content": 0.00016225686704274267, "timestamp": "2025-09-10 02:24:27.324612", "step": 4038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:27.362907", "step": 4038, "epoch": 2 }, { "type": "loss", "content": 0.00895176362246275, "timestamp": "2025-09-10 02:24:27.376886", "step": 4039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:27.413372", "step": 4039, "epoch": 2 }, { "type": "loss", "content": 0.0012197830947116017, "timestamp": "2025-09-10 02:24:27.438493", "step": 4040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:24:27.475483", "step": 4040, "epoch": 2 }, { "type": "loss", "content": 0.00033447827445343137, "timestamp": "2025-09-10 02:24:27.488606", "step": 4041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:27.521034", "step": 4041, "epoch": 2 }, { "type": "loss", "content": 0.001729366136714816, "timestamp": "2025-09-10 02:24:27.527787", "step": 4042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:27.560417", "step": 4042, "epoch": 2 }, { "type": "loss", "content": 0.01877027377486229, "timestamp": "2025-09-10 02:24:27.567838", "step": 4043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:27.600247", "step": 4043, "epoch": 2 }, { "type": "loss", "content": 0.0001404429494868964, "timestamp": "2025-09-10 02:24:27.627831", "step": 4044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:27.663723", "step": 4044, "epoch": 2 }, { "type": "loss", "content": 0.027306651696562767, "timestamp": "2025-09-10 02:24:27.675805", "step": 4045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:27.711995", "step": 4045, "epoch": 2 }, { "type": "loss", "content": 0.00820981990545988, "timestamp": "2025-09-10 02:24:27.723866", "step": 4046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:27.757447", "step": 4046, "epoch": 2 }, { "type": "loss", "content": 0.00019994494505226612, "timestamp": "2025-09-10 02:24:27.767386", "step": 4047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:27.800142", "step": 4047, "epoch": 2 }, { "type": "loss", "content": 0.0016157986829057336, "timestamp": "2025-09-10 02:24:27.825392", "step": 4048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:27.857196", "step": 4048, "epoch": 2 }, { "type": "loss", "content": 0.0032153644133359194, "timestamp": "2025-09-10 02:24:27.862081", "step": 4049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:27.895807", "step": 4049, "epoch": 2 }, { "type": "loss", "content": 0.0009131658589467406, "timestamp": "2025-09-10 02:24:27.902771", "step": 4050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:27.938617", "step": 4050, "epoch": 2 }, { "type": "loss", "content": 0.002615003613755107, "timestamp": "2025-09-10 02:24:27.941114", "step": 4051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:27.972303", "step": 4051, "epoch": 2 }, { "type": "loss", "content": 0.0024026173632591963, "timestamp": "2025-09-10 02:24:28.003897", "step": 4052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:28.037982", "step": 4052, "epoch": 2 }, { "type": "loss", "content": 0.0002667165535967797, "timestamp": "2025-09-10 02:24:28.045276", "step": 4053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:28.078604", "step": 4053, "epoch": 2 }, { "type": "loss", "content": 0.0019048672402277589, "timestamp": "2025-09-10 02:24:28.090526", "step": 4054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:28.122157", "step": 4054, "epoch": 2 }, { "type": "loss", "content": 0.0007782382308505476, "timestamp": "2025-09-10 02:24:28.129961", "step": 4055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:28.166964", "step": 4055, "epoch": 2 }, { "type": "loss", "content": 0.01114829070866108, "timestamp": "2025-09-10 02:24:28.201501", "step": 4056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:28.245463", "step": 4056, "epoch": 2 }, { "type": "loss", "content": 0.0023074380587786436, "timestamp": "2025-09-10 02:24:28.260862", "step": 4057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:28.293857", "step": 4057, "epoch": 2 }, { "type": "loss", "content": 0.002955834148451686, "timestamp": "2025-09-10 02:24:28.301123", "step": 4058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:28.333365", "step": 4058, "epoch": 2 }, { "type": "loss", "content": 0.003895343979820609, "timestamp": "2025-09-10 02:24:28.337305", "step": 4059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:28.373184", "step": 4059, "epoch": 2 }, { "type": "loss", "content": 0.008619307540357113, "timestamp": "2025-09-10 02:24:28.407381", "step": 4060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:28.441969", "step": 4060, "epoch": 2 }, { "type": "loss", "content": 0.016522839665412903, "timestamp": "2025-09-10 02:24:28.449041", "step": 4061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:28.483243", "step": 4061, "epoch": 2 }, { "type": "loss", "content": 0.0006908263312652707, "timestamp": "2025-09-10 02:24:28.492993", "step": 4062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:28.527778", "step": 4062, "epoch": 2 }, { "type": "loss", "content": 0.0022160657681524754, "timestamp": "2025-09-10 02:24:28.534885", "step": 4063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:28.567164", "step": 4063, "epoch": 2 }, { "type": "loss", "content": 0.00819767639040947, "timestamp": "2025-09-10 02:24:28.594498", "step": 4064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:28.629854", "step": 4064, "epoch": 2 }, { "type": "loss", "content": 0.0025234988424926996, "timestamp": "2025-09-10 02:24:28.639696", "step": 4065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:28.671547", "step": 4065, "epoch": 2 }, { "type": "loss", "content": 0.0017039499944075942, "timestamp": "2025-09-10 02:24:28.678953", "step": 4066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:28.709858", "step": 4066, "epoch": 2 }, { "type": "loss", "content": 0.0045456611551344395, "timestamp": "2025-09-10 02:24:28.720134", "step": 4067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:28.752404", "step": 4067, "epoch": 2 }, { "type": "loss", "content": 0.05136652663350105, "timestamp": "2025-09-10 02:24:28.780690", "step": 4068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:28.811401", "step": 4068, "epoch": 2 }, { "type": "loss", "content": 0.0038302938919514418, "timestamp": "2025-09-10 02:24:28.816041", "step": 4069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:28.851839", "step": 4069, "epoch": 2 }, { "type": "loss", "content": 0.0043130056001245975, "timestamp": "2025-09-10 02:24:28.865507", "step": 4070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:28.896166", "step": 4070, "epoch": 2 }, { "type": "loss", "content": 0.004200483672320843, "timestamp": "2025-09-10 02:24:28.903238", "step": 4071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:28.935274", "step": 4071, "epoch": 2 }, { "type": "loss", "content": 0.0009043649188242853, "timestamp": "2025-09-10 02:24:28.959569", "step": 4072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:28.991351", "step": 4072, "epoch": 2 }, { "type": "loss", "content": 0.001501325867138803, "timestamp": "2025-09-10 02:24:28.996915", "step": 4073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:29.028148", "step": 4073, "epoch": 2 }, { "type": "loss", "content": 0.0002941501443274319, "timestamp": "2025-09-10 02:24:29.030693", "step": 4074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:24:29.076492", "step": 4074, "epoch": 2 }, { "type": "loss", "content": 0.001308751991018653, "timestamp": "2025-09-10 02:24:29.095675", "step": 4075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:29.125589", "step": 4075, "epoch": 2 }, { "type": "loss", "content": 0.0014235320268198848, "timestamp": "2025-09-10 02:24:29.153553", "step": 4076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:29.186944", "step": 4076, "epoch": 2 }, { "type": "loss", "content": 5.163023524801247e-05, "timestamp": "2025-09-10 02:24:29.197445", "step": 4077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:24:29.229513", "step": 4077, "epoch": 2 }, { "type": "loss", "content": 0.0026386440731585026, "timestamp": "2025-09-10 02:24:29.231295", "step": 4078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:29.262111", "step": 4078, "epoch": 2 }, { "type": "loss", "content": 0.0006315871723927557, "timestamp": "2025-09-10 02:24:29.272626", "step": 4079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:29.304174", "step": 4079, "epoch": 2 }, { "type": "loss", "content": 0.0019018551101908088, "timestamp": "2025-09-10 02:24:29.329256", "step": 4080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:29.363347", "step": 4080, "epoch": 2 }, { "type": "loss", "content": 0.0014254730194807053, "timestamp": "2025-09-10 02:24:29.376636", "step": 4081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:29.408930", "step": 4081, "epoch": 2 }, { "type": "loss", "content": 0.012064416892826557, "timestamp": "2025-09-10 02:24:29.418881", "step": 4082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:29.451248", "step": 4082, "epoch": 2 }, { "type": "loss", "content": 0.006986396852880716, "timestamp": "2025-09-10 02:24:29.458324", "step": 4083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:29.495905", "step": 4083, "epoch": 2 }, { "type": "loss", "content": 0.00028711804770864546, "timestamp": "2025-09-10 02:24:29.530484", "step": 4084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:29.562704", "step": 4084, "epoch": 2 }, { "type": "loss", "content": 0.054849933832883835, "timestamp": "2025-09-10 02:24:29.567305", "step": 4085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:29.596976", "step": 4085, "epoch": 2 }, { "type": "loss", "content": 0.015255759470164776, "timestamp": "2025-09-10 02:24:29.603589", "step": 4086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:29.634971", "step": 4086, "epoch": 2 }, { "type": "loss", "content": 0.00023272530233953148, "timestamp": "2025-09-10 02:24:29.637246", "step": 4087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:29.668012", "step": 4087, "epoch": 2 }, { "type": "loss", "content": 0.0014455585042014718, "timestamp": "2025-09-10 02:24:29.691670", "step": 4088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:29.722453", "step": 4088, "epoch": 2 }, { "type": "loss", "content": 0.08093362301588058, "timestamp": "2025-09-10 02:24:29.726769", "step": 4089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:29.766804", "step": 4089, "epoch": 2 }, { "type": "loss", "content": 0.05044776201248169, "timestamp": "2025-09-10 02:24:29.782688", "step": 4090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:29.814916", "step": 4090, "epoch": 2 }, { "type": "loss", "content": 0.004336885642260313, "timestamp": "2025-09-10 02:24:29.818884", "step": 4091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:29.850230", "step": 4091, "epoch": 2 }, { "type": "loss", "content": 0.012839260511100292, "timestamp": "2025-09-10 02:24:29.878529", "step": 4092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:29.910753", "step": 4092, "epoch": 2 }, { "type": "loss", "content": 0.0022947373799979687, "timestamp": "2025-09-10 02:24:29.915492", "step": 4093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:29.945821", "step": 4093, "epoch": 2 }, { "type": "loss", "content": 0.00331043335609138, "timestamp": "2025-09-10 02:24:29.949679", "step": 4094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:29.982228", "step": 4094, "epoch": 2 }, { "type": "loss", "content": 0.0018772233743220568, "timestamp": "2025-09-10 02:24:29.988899", "step": 4095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:30.022329", "step": 4095, "epoch": 2 }, { "type": "loss", "content": 0.0015633044531568885, "timestamp": "2025-09-10 02:24:30.056674", "step": 4096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:30.087499", "step": 4096, "epoch": 2 }, { "type": "loss", "content": 0.007612540386617184, "timestamp": "2025-09-10 02:24:30.092839", "step": 4097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:30.124846", "step": 4097, "epoch": 2 }, { "type": "loss", "content": 0.0009734017075970769, "timestamp": "2025-09-10 02:24:30.132521", "step": 4098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:30.167935", "step": 4098, "epoch": 2 }, { "type": "loss", "content": 0.0005414964980445802, "timestamp": "2025-09-10 02:24:30.172166", "step": 4099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:24:30.224308", "step": 4099, "epoch": 2 }, { "type": "loss", "content": 0.003065018681809306, "timestamp": "2025-09-10 02:24:30.266654", "step": 4100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:30.299544", "step": 4100, "epoch": 2 }, { "type": "loss", "content": 0.0015219785273075104, "timestamp": "2025-09-10 02:24:30.301834", "step": 4101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:30.335591", "step": 4101, "epoch": 2 }, { "type": "loss", "content": 0.0007919540512375534, "timestamp": "2025-09-10 02:24:30.346183", "step": 4102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:30.382001", "step": 4102, "epoch": 2 }, { "type": "loss", "content": 0.0004758323193527758, "timestamp": "2025-09-10 02:24:30.385670", "step": 4103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:30.420886", "step": 4103, "epoch": 2 }, { "type": "loss", "content": 0.001412588288076222, "timestamp": "2025-09-10 02:24:30.445924", "step": 4104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:30.479410", "step": 4104, "epoch": 2 }, { "type": "loss", "content": 0.0007647694437764585, "timestamp": "2025-09-10 02:24:30.481760", "step": 4105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:30.513060", "step": 4105, "epoch": 2 }, { "type": "loss", "content": 0.00031270290492102504, "timestamp": "2025-09-10 02:24:30.515597", "step": 4106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:30.547183", "step": 4106, "epoch": 2 }, { "type": "loss", "content": 0.0012159907491877675, "timestamp": "2025-09-10 02:24:30.549834", "step": 4107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:24:30.590714", "step": 4107, "epoch": 2 }, { "type": "loss", "content": 0.001639689551666379, "timestamp": "2025-09-10 02:24:30.627898", "step": 4108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:30.661074", "step": 4108, "epoch": 2 }, { "type": "loss", "content": 0.0044628409668803215, "timestamp": "2025-09-10 02:24:30.674061", "step": 4109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:30.705416", "step": 4109, "epoch": 2 }, { "type": "loss", "content": 0.0015483727911487222, "timestamp": "2025-09-10 02:24:30.715405", "step": 4110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:30.746478", "step": 4110, "epoch": 2 }, { "type": "loss", "content": 0.012858943082392216, "timestamp": "2025-09-10 02:24:30.756300", "step": 4111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:30.788159", "step": 4111, "epoch": 2 }, { "type": "loss", "content": 0.0005279368488118052, "timestamp": "2025-09-10 02:24:30.816741", "step": 4112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:30.848252", "step": 4112, "epoch": 2 }, { "type": "loss", "content": 0.001972366590052843, "timestamp": "2025-09-10 02:24:30.852943", "step": 4113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:30.884524", "step": 4113, "epoch": 2 }, { "type": "loss", "content": 0.0004011372511740774, "timestamp": "2025-09-10 02:24:30.891184", "step": 4114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:30.922970", "step": 4114, "epoch": 2 }, { "type": "loss", "content": 0.0024985368363559246, "timestamp": "2025-09-10 02:24:30.926771", "step": 4115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:30.957507", "step": 4115, "epoch": 2 }, { "type": "loss", "content": 0.0012376006925478578, "timestamp": "2025-09-10 02:24:30.982801", "step": 4116, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:24:41.499678", "step": 4116, "epoch": 2 }, { "type": "pplx", "content": 19906806.935294818, "timestamp": "2025-09-10 02:24:41.520920", "step": 4116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:41.580625", "step": 4116, "epoch": 2 }, { "type": "loss", "content": 0.0007578267832286656, "timestamp": "2025-09-10 02:24:41.597958", "step": 4117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:41.653595", "step": 4117, "epoch": 2 }, { "type": "loss", "content": 0.0012618020409718156, "timestamp": "2025-09-10 02:24:41.657564", "step": 4118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:41.698161", "step": 4118, "epoch": 2 }, { "type": "loss", "content": 0.0005286371451802552, "timestamp": "2025-09-10 02:24:41.710022", "step": 4119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:41.789569", "step": 4119, "epoch": 2 }, { "type": "loss", "content": 0.003075662301853299, "timestamp": "2025-09-10 02:24:41.817807", "step": 4120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:41.903531", "step": 4120, "epoch": 2 }, { "type": "loss", "content": 0.0006497717113234103, "timestamp": "2025-09-10 02:24:41.921573", "step": 4121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:42.001125", "step": 4121, "epoch": 2 }, { "type": "loss", "content": 0.0038573991041630507, "timestamp": "2025-09-10 02:24:42.018713", "step": 4122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:42.096979", "step": 4122, "epoch": 2 }, { "type": "loss", "content": 0.003996575251221657, "timestamp": "2025-09-10 02:24:42.105445", "step": 4123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:42.150019", "step": 4123, "epoch": 2 }, { "type": "loss", "content": 0.0023050843738019466, "timestamp": "2025-09-10 02:24:42.180754", "step": 4124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:42.215093", "step": 4124, "epoch": 2 }, { "type": "loss", "content": 0.0015655980678275228, "timestamp": "2025-09-10 02:24:42.218567", "step": 4125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:42.250888", "step": 4125, "epoch": 2 }, { "type": "loss", "content": 0.003783722873777151, "timestamp": "2025-09-10 02:24:42.258287", "step": 4126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:42.292363", "step": 4126, "epoch": 2 }, { "type": "loss", "content": 0.000846231181640178, "timestamp": "2025-09-10 02:24:42.298745", "step": 4127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:42.330802", "step": 4127, "epoch": 2 }, { "type": "loss", "content": 0.0018645375967025757, "timestamp": "2025-09-10 02:24:42.358824", "step": 4128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:42.391712", "step": 4128, "epoch": 2 }, { "type": "loss", "content": 0.0006940880557522178, "timestamp": "2025-09-10 02:24:42.395966", "step": 4129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:42.428546", "step": 4129, "epoch": 2 }, { "type": "loss", "content": 0.001143784262239933, "timestamp": "2025-09-10 02:24:42.439938", "step": 4130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:42.472405", "step": 4130, "epoch": 2 }, { "type": "loss", "content": 0.0018830805784091353, "timestamp": "2025-09-10 02:24:42.478601", "step": 4131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:42.510561", "step": 4131, "epoch": 2 }, { "type": "loss", "content": 0.0007161656394600868, "timestamp": "2025-09-10 02:24:42.539023", "step": 4132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:42.570242", "step": 4132, "epoch": 2 }, { "type": "loss", "content": 0.0023870845325291157, "timestamp": "2025-09-10 02:24:42.574528", "step": 4133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:42.607954", "step": 4133, "epoch": 2 }, { "type": "loss", "content": 0.01680189184844494, "timestamp": "2025-09-10 02:24:42.621289", "step": 4134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:42.652852", "step": 4134, "epoch": 2 }, { "type": "loss", "content": 0.0025117939803749323, "timestamp": "2025-09-10 02:24:42.663506", "step": 4135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:42.694081", "step": 4135, "epoch": 2 }, { "type": "loss", "content": 0.01693909242749214, "timestamp": "2025-09-10 02:24:42.717805", "step": 4136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:42.749274", "step": 4136, "epoch": 2 }, { "type": "loss", "content": 0.0008668032241985202, "timestamp": "2025-09-10 02:24:42.757944", "step": 4137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:42.794806", "step": 4137, "epoch": 2 }, { "type": "loss", "content": 0.0012921657180413604, "timestamp": "2025-09-10 02:24:42.808844", "step": 4138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:42.841435", "step": 4138, "epoch": 2 }, { "type": "loss", "content": 0.0010435592848807573, "timestamp": "2025-09-10 02:24:42.851930", "step": 4139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:42.883699", "step": 4139, "epoch": 2 }, { "type": "loss", "content": 0.005313398782163858, "timestamp": "2025-09-10 02:24:42.908438", "step": 4140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:42.945631", "step": 4140, "epoch": 2 }, { "type": "loss", "content": 0.009342093952000141, "timestamp": "2025-09-10 02:24:42.960811", "step": 4141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:42.992178", "step": 4141, "epoch": 2 }, { "type": "loss", "content": 0.0005205129855312407, "timestamp": "2025-09-10 02:24:43.003346", "step": 4142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:43.033595", "step": 4142, "epoch": 2 }, { "type": "loss", "content": 0.0024474586825817823, "timestamp": "2025-09-10 02:24:43.036392", "step": 4143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:43.067860", "step": 4143, "epoch": 2 }, { "type": "loss", "content": 0.00045305112143978477, "timestamp": "2025-09-10 02:24:43.095054", "step": 4144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:24:43.125624", "step": 4144, "epoch": 2 }, { "type": "loss", "content": 0.0028814957477152348, "timestamp": "2025-09-10 02:24:43.127838", "step": 4145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:43.158326", "step": 4145, "epoch": 2 }, { "type": "loss", "content": 0.000609158945735544, "timestamp": "2025-09-10 02:24:43.165151", "step": 4146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:43.195363", "step": 4146, "epoch": 2 }, { "type": "loss", "content": 0.0028953743167221546, "timestamp": "2025-09-10 02:24:43.199436", "step": 4147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:43.237345", "step": 4147, "epoch": 2 }, { "type": "loss", "content": 0.0039150347001850605, "timestamp": "2025-09-10 02:24:43.273804", "step": 4148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:43.304696", "step": 4148, "epoch": 2 }, { "type": "loss", "content": 0.001472481875680387, "timestamp": "2025-09-10 02:24:43.312523", "step": 4149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:43.348228", "step": 4149, "epoch": 2 }, { "type": "loss", "content": 0.0046607027761638165, "timestamp": "2025-09-10 02:24:43.362171", "step": 4150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:43.392806", "step": 4150, "epoch": 2 }, { "type": "loss", "content": 0.005251821596175432, "timestamp": "2025-09-10 02:24:43.399769", "step": 4151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:43.431584", "step": 4151, "epoch": 2 }, { "type": "loss", "content": 0.0006825706223025918, "timestamp": "2025-09-10 02:24:43.456710", "step": 4152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:43.486943", "step": 4152, "epoch": 2 }, { "type": "loss", "content": 0.008997195400297642, "timestamp": "2025-09-10 02:24:43.489487", "step": 4153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:43.520251", "step": 4153, "epoch": 2 }, { "type": "loss", "content": 0.00021450709027703851, "timestamp": "2025-09-10 02:24:43.524736", "step": 4154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:43.556031", "step": 4154, "epoch": 2 }, { "type": "loss", "content": 0.0032714589033275843, "timestamp": "2025-09-10 02:24:43.563521", "step": 4155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:43.598714", "step": 4155, "epoch": 2 }, { "type": "loss", "content": 0.0018134496640414, "timestamp": "2025-09-10 02:24:43.626447", "step": 4156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:43.661551", "step": 4156, "epoch": 2 }, { "type": "loss", "content": 0.0035523748956620693, "timestamp": "2025-09-10 02:24:43.671258", "step": 4157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:43.711589", "step": 4157, "epoch": 2 }, { "type": "loss", "content": 0.026274150237441063, "timestamp": "2025-09-10 02:24:43.724961", "step": 4158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:43.759358", "step": 4158, "epoch": 2 }, { "type": "loss", "content": 0.0005512969219125807, "timestamp": "2025-09-10 02:24:43.772658", "step": 4159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:43.803614", "step": 4159, "epoch": 2 }, { "type": "loss", "content": 0.00017312598356511444, "timestamp": "2025-09-10 02:24:43.827260", "step": 4160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:43.857429", "step": 4160, "epoch": 2 }, { "type": "loss", "content": 0.0010247546015307307, "timestamp": "2025-09-10 02:24:43.860571", "step": 4161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:43.890887", "step": 4161, "epoch": 2 }, { "type": "loss", "content": 0.02129758708178997, "timestamp": "2025-09-10 02:24:43.895411", "step": 4162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:43.929362", "step": 4162, "epoch": 2 }, { "type": "loss", "content": 0.00029126249137334526, "timestamp": "2025-09-10 02:24:43.938239", "step": 4163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:43.981123", "step": 4163, "epoch": 2 }, { "type": "loss", "content": 0.000328995258314535, "timestamp": "2025-09-10 02:24:44.015235", "step": 4164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:44.045898", "step": 4164, "epoch": 2 }, { "type": "loss", "content": 0.00022505922242999077, "timestamp": "2025-09-10 02:24:44.047935", "step": 4165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:44.079952", "step": 4165, "epoch": 2 }, { "type": "loss", "content": 0.0010929142590612173, "timestamp": "2025-09-10 02:24:44.090664", "step": 4166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:44.122893", "step": 4166, "epoch": 2 }, { "type": "loss", "content": 0.000381884427042678, "timestamp": "2025-09-10 02:24:44.132234", "step": 4167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:44.164070", "step": 4167, "epoch": 2 }, { "type": "loss", "content": 0.008006826043128967, "timestamp": "2025-09-10 02:24:44.188101", "step": 4168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:44.219173", "step": 4168, "epoch": 2 }, { "type": "loss", "content": 0.0003847822081297636, "timestamp": "2025-09-10 02:24:44.228953", "step": 4169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:44.259640", "step": 4169, "epoch": 2 }, { "type": "loss", "content": 0.002151952590793371, "timestamp": "2025-09-10 02:24:44.266362", "step": 4170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:44.296706", "step": 4170, "epoch": 2 }, { "type": "loss", "content": 0.0007263789302669466, "timestamp": "2025-09-10 02:24:44.306932", "step": 4171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:44.337993", "step": 4171, "epoch": 2 }, { "type": "loss", "content": 0.014661334455013275, "timestamp": "2025-09-10 02:24:44.370694", "step": 4172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:44.401986", "step": 4172, "epoch": 2 }, { "type": "loss", "content": 0.001283987076021731, "timestamp": "2025-09-10 02:24:44.407582", "step": 4173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:44.442184", "step": 4173, "epoch": 2 }, { "type": "loss", "content": 0.0033758750651031733, "timestamp": "2025-09-10 02:24:44.455922", "step": 4174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:44.499085", "step": 4174, "epoch": 2 }, { "type": "loss", "content": 0.0006651729927398264, "timestamp": "2025-09-10 02:24:44.508438", "step": 4175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:44.552371", "step": 4175, "epoch": 2 }, { "type": "loss", "content": 0.022680295631289482, "timestamp": "2025-09-10 02:24:44.579998", "step": 4176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:44.621199", "step": 4176, "epoch": 2 }, { "type": "loss", "content": 0.005751411896198988, "timestamp": "2025-09-10 02:24:44.629067", "step": 4177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:44.675082", "step": 4177, "epoch": 2 }, { "type": "loss", "content": 0.00028175374609418213, "timestamp": "2025-09-10 02:24:44.681997", "step": 4178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:44.728441", "step": 4178, "epoch": 2 }, { "type": "loss", "content": 0.00025079899933189154, "timestamp": "2025-09-10 02:24:44.742394", "step": 4179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:44.781232", "step": 4179, "epoch": 2 }, { "type": "loss", "content": 0.002579400083050132, "timestamp": "2025-09-10 02:24:44.808965", "step": 4180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:44.848373", "step": 4180, "epoch": 2 }, { "type": "loss", "content": 0.0009939942974597216, "timestamp": "2025-09-10 02:24:44.858729", "step": 4181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:44.892789", "step": 4181, "epoch": 2 }, { "type": "loss", "content": 0.00024698293418623507, "timestamp": "2025-09-10 02:24:44.899796", "step": 4182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:44.933836", "step": 4182, "epoch": 2 }, { "type": "loss", "content": 0.015613092109560966, "timestamp": "2025-09-10 02:24:44.941548", "step": 4183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:44.974017", "step": 4183, "epoch": 2 }, { "type": "loss", "content": 0.0022488494869321585, "timestamp": "2025-09-10 02:24:45.001793", "step": 4184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:45.035497", "step": 4184, "epoch": 2 }, { "type": "loss", "content": 0.0015916310949251056, "timestamp": "2025-09-10 02:24:45.037589", "step": 4185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:45.068522", "step": 4185, "epoch": 2 }, { "type": "loss", "content": 0.0012442750157788396, "timestamp": "2025-09-10 02:24:45.075681", "step": 4186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:45.107142", "step": 4186, "epoch": 2 }, { "type": "loss", "content": 0.00012972517288289964, "timestamp": "2025-09-10 02:24:45.117217", "step": 4187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:45.148182", "step": 4187, "epoch": 2 }, { "type": "loss", "content": 0.00011532863572938368, "timestamp": "2025-09-10 02:24:45.175919", "step": 4188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:45.207610", "step": 4188, "epoch": 2 }, { "type": "loss", "content": 0.0004722306621260941, "timestamp": "2025-09-10 02:24:45.209939", "step": 4189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:45.240550", "step": 4189, "epoch": 2 }, { "type": "loss", "content": 0.0006129414541646838, "timestamp": "2025-09-10 02:24:45.244713", "step": 4190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:45.277997", "step": 4190, "epoch": 2 }, { "type": "loss", "content": 0.016751401126384735, "timestamp": "2025-09-10 02:24:45.285710", "step": 4191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:45.317813", "step": 4191, "epoch": 2 }, { "type": "loss", "content": 0.0004313217068556696, "timestamp": "2025-09-10 02:24:45.346485", "step": 4192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:45.379053", "step": 4192, "epoch": 2 }, { "type": "loss", "content": 0.0681036114692688, "timestamp": "2025-09-10 02:24:45.387056", "step": 4193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:45.422109", "step": 4193, "epoch": 2 }, { "type": "loss", "content": 0.0021573789417743683, "timestamp": "2025-09-10 02:24:45.429574", "step": 4194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:45.462679", "step": 4194, "epoch": 2 }, { "type": "loss", "content": 0.00015258920029737055, "timestamp": "2025-09-10 02:24:45.474668", "step": 4195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:45.505846", "step": 4195, "epoch": 2 }, { "type": "loss", "content": 0.0043427967466413975, "timestamp": "2025-09-10 02:24:45.533466", "step": 4196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:45.564991", "step": 4196, "epoch": 2 }, { "type": "loss", "content": 6.901784217916429e-05, "timestamp": "2025-09-10 02:24:45.574286", "step": 4197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:45.605121", "step": 4197, "epoch": 2 }, { "type": "loss", "content": 0.006661687511950731, "timestamp": "2025-09-10 02:24:45.609203", "step": 4198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:45.642559", "step": 4198, "epoch": 2 }, { "type": "loss", "content": 0.00025688271853141487, "timestamp": "2025-09-10 02:24:45.650157", "step": 4199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:45.681105", "step": 4199, "epoch": 2 }, { "type": "loss", "content": 0.010232685133814812, "timestamp": "2025-09-10 02:24:45.709890", "step": 4200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:45.743640", "step": 4200, "epoch": 2 }, { "type": "loss", "content": 0.017913201823830605, "timestamp": "2025-09-10 02:24:45.749224", "step": 4201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:45.783408", "step": 4201, "epoch": 2 }, { "type": "loss", "content": 0.000634572294075042, "timestamp": "2025-09-10 02:24:45.790563", "step": 4202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:45.823229", "step": 4202, "epoch": 2 }, { "type": "loss", "content": 0.001113194739446044, "timestamp": "2025-09-10 02:24:45.830989", "step": 4203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:45.862907", "step": 4203, "epoch": 2 }, { "type": "loss", "content": 4.078313577338122e-05, "timestamp": "2025-09-10 02:24:45.894873", "step": 4204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:45.927700", "step": 4204, "epoch": 2 }, { "type": "loss", "content": 7.009686669334769e-05, "timestamp": "2025-09-10 02:24:45.940748", "step": 4205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:45.971903", "step": 4205, "epoch": 2 }, { "type": "loss", "content": 0.013100274838507175, "timestamp": "2025-09-10 02:24:45.978878", "step": 4206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:46.010401", "step": 4206, "epoch": 2 }, { "type": "loss", "content": 0.016267575323581696, "timestamp": "2025-09-10 02:24:46.017892", "step": 4207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:46.049063", "step": 4207, "epoch": 2 }, { "type": "loss", "content": 0.00022457198065239936, "timestamp": "2025-09-10 02:24:46.077377", "step": 4208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:46.115376", "step": 4208, "epoch": 2 }, { "type": "loss", "content": 0.0004984191036783159, "timestamp": "2025-09-10 02:24:46.130853", "step": 4209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 848 ], "flops": 25154260214720 }, "timestamp": "2025-09-10 02:24:46.202347", "step": 4209, "epoch": 2 }, { "type": "loss", "content": 0.0003745494468603283, "timestamp": "2025-09-10 02:24:46.231818", "step": 4210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:46.263380", "step": 4210, "epoch": 2 }, { "type": "loss", "content": 0.00043498026207089424, "timestamp": "2025-09-10 02:24:46.275916", "step": 4211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:46.306859", "step": 4211, "epoch": 2 }, { "type": "loss", "content": 0.00025909944088198245, "timestamp": "2025-09-10 02:24:46.331627", "step": 4212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:46.362621", "step": 4212, "epoch": 2 }, { "type": "loss", "content": 0.0015551492106169462, "timestamp": "2025-09-10 02:24:46.365636", "step": 4213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:46.396632", "step": 4213, "epoch": 2 }, { "type": "loss", "content": 0.0015605135122314095, "timestamp": "2025-09-10 02:24:46.406815", "step": 4214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:46.440241", "step": 4214, "epoch": 2 }, { "type": "loss", "content": 0.020354004576802254, "timestamp": "2025-09-10 02:24:46.447335", "step": 4215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:46.481626", "step": 4215, "epoch": 2 }, { "type": "loss", "content": 0.033257655799388885, "timestamp": "2025-09-10 02:24:46.516215", "step": 4216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:46.549841", "step": 4216, "epoch": 2 }, { "type": "loss", "content": 0.0032047501299530268, "timestamp": "2025-09-10 02:24:46.563141", "step": 4217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:46.595764", "step": 4217, "epoch": 2 }, { "type": "loss", "content": 0.0013879657490178943, "timestamp": "2025-09-10 02:24:46.600157", "step": 4218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:24:46.643464", "step": 4218, "epoch": 2 }, { "type": "loss", "content": 0.00234273006208241, "timestamp": "2025-09-10 02:24:46.661172", "step": 4219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:24:46.702414", "step": 4219, "epoch": 2 }, { "type": "loss", "content": 0.0005795766483061016, "timestamp": "2025-09-10 02:24:46.739479", "step": 4220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:46.770239", "step": 4220, "epoch": 2 }, { "type": "loss", "content": 0.0004290399665478617, "timestamp": "2025-09-10 02:24:46.772483", "step": 4221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:46.803784", "step": 4221, "epoch": 2 }, { "type": "loss", "content": 0.017445342615246773, "timestamp": "2025-09-10 02:24:46.816337", "step": 4222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:46.847245", "step": 4222, "epoch": 2 }, { "type": "loss", "content": 0.0006286733550950885, "timestamp": "2025-09-10 02:24:46.855283", "step": 4223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:46.886289", "step": 4223, "epoch": 2 }, { "type": "loss", "content": 0.0007370785460807383, "timestamp": "2025-09-10 02:24:46.918074", "step": 4224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:46.948655", "step": 4224, "epoch": 2 }, { "type": "loss", "content": 0.004575326107442379, "timestamp": "2025-09-10 02:24:46.957270", "step": 4225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:46.988637", "step": 4225, "epoch": 2 }, { "type": "loss", "content": 0.0020077417138963938, "timestamp": "2025-09-10 02:24:46.995633", "step": 4226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:47.032864", "step": 4226, "epoch": 2 }, { "type": "loss", "content": 0.001171283540315926, "timestamp": "2025-09-10 02:24:47.046226", "step": 4227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:47.077882", "step": 4227, "epoch": 2 }, { "type": "loss", "content": 0.00035053075407631695, "timestamp": "2025-09-10 02:24:47.105616", "step": 4228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:47.136916", "step": 4228, "epoch": 2 }, { "type": "loss", "content": 0.0004195565707050264, "timestamp": "2025-09-10 02:24:47.144823", "step": 4229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:47.176989", "step": 4229, "epoch": 2 }, { "type": "loss", "content": 0.0031623467803001404, "timestamp": "2025-09-10 02:24:47.183791", "step": 4230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:24:47.215231", "step": 4230, "epoch": 2 }, { "type": "loss", "content": 0.00927420798689127, "timestamp": "2025-09-10 02:24:47.217461", "step": 4231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:47.256876", "step": 4231, "epoch": 2 }, { "type": "loss", "content": 0.00882531888782978, "timestamp": "2025-09-10 02:24:47.293361", "step": 4232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:47.324758", "step": 4232, "epoch": 2 }, { "type": "loss", "content": 0.0013175641652196646, "timestamp": "2025-09-10 02:24:47.327064", "step": 4233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:47.360596", "step": 4233, "epoch": 2 }, { "type": "loss", "content": 0.02007768489420414, "timestamp": "2025-09-10 02:24:47.373186", "step": 4234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:47.405075", "step": 4234, "epoch": 2 }, { "type": "loss", "content": 0.014163470827043056, "timestamp": "2025-09-10 02:24:47.415328", "step": 4235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:47.446112", "step": 4235, "epoch": 2 }, { "type": "loss", "content": 0.02005593292415142, "timestamp": "2025-09-10 02:24:47.474100", "step": 4236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:24:47.510982", "step": 4236, "epoch": 2 }, { "type": "loss", "content": 0.0009125882061198354, "timestamp": "2025-09-10 02:24:47.526175", "step": 4237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:47.557015", "step": 4237, "epoch": 2 }, { "type": "loss", "content": 0.01142832636833191, "timestamp": "2025-09-10 02:24:47.564834", "step": 4238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:47.595788", "step": 4238, "epoch": 2 }, { "type": "loss", "content": 0.018545877188444138, "timestamp": "2025-09-10 02:24:47.603091", "step": 4239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:47.637638", "step": 4239, "epoch": 2 }, { "type": "loss", "content": 0.0013059125049039721, "timestamp": "2025-09-10 02:24:47.672262", "step": 4240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:47.704270", "step": 4240, "epoch": 2 }, { "type": "loss", "content": 0.0013210356701165438, "timestamp": "2025-09-10 02:24:47.708942", "step": 4241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:47.739973", "step": 4241, "epoch": 2 }, { "type": "loss", "content": 0.014471757225692272, "timestamp": "2025-09-10 02:24:47.746657", "step": 4242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:47.778557", "step": 4242, "epoch": 2 }, { "type": "loss", "content": 0.006982952821999788, "timestamp": "2025-09-10 02:24:47.791126", "step": 4243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:47.822714", "step": 4243, "epoch": 2 }, { "type": "loss", "content": 0.0017880608793348074, "timestamp": "2025-09-10 02:24:47.850346", "step": 4244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:47.880878", "step": 4244, "epoch": 2 }, { "type": "loss", "content": 0.005929925944656134, "timestamp": "2025-09-10 02:24:47.886066", "step": 4245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:47.917504", "step": 4245, "epoch": 2 }, { "type": "loss", "content": 0.004261931870132685, "timestamp": "2025-09-10 02:24:47.925253", "step": 4246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:47.955408", "step": 4246, "epoch": 2 }, { "type": "loss", "content": 0.0011088837636634707, "timestamp": "2025-09-10 02:24:47.962565", "step": 4247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:48.013895", "step": 4247, "epoch": 2 }, { "type": "loss", "content": 0.009494653902947903, "timestamp": "2025-09-10 02:24:48.042624", "step": 4248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:48.072576", "step": 4248, "epoch": 2 }, { "type": "loss", "content": 0.0014409434515982866, "timestamp": "2025-09-10 02:24:48.077188", "step": 4249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:48.109097", "step": 4249, "epoch": 2 }, { "type": "loss", "content": 0.0007126140990294516, "timestamp": "2025-09-10 02:24:48.116845", "step": 4250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:24:48.171651", "step": 4250, "epoch": 2 }, { "type": "loss", "content": 0.0010598188964650035, "timestamp": "2025-09-10 02:24:48.195085", "step": 4251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:24:48.230399", "step": 4251, "epoch": 2 }, { "type": "loss", "content": 0.0014534889487549663, "timestamp": "2025-09-10 02:24:48.261522", "step": 4252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:48.293468", "step": 4252, "epoch": 2 }, { "type": "loss", "content": 0.0003932244435418397, "timestamp": "2025-09-10 02:24:48.297886", "step": 4253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:24:48.336730", "step": 4253, "epoch": 2 }, { "type": "loss", "content": 0.0024409524630755186, "timestamp": "2025-09-10 02:24:48.352588", "step": 4254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:48.383837", "step": 4254, "epoch": 2 }, { "type": "loss", "content": 0.00022083787189330906, "timestamp": "2025-09-10 02:24:48.390767", "step": 4255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:48.423234", "step": 4255, "epoch": 2 }, { "type": "loss", "content": 0.004927542991936207, "timestamp": "2025-09-10 02:24:48.451762", "step": 4256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:48.484907", "step": 4256, "epoch": 2 }, { "type": "loss", "content": 0.0016823627520352602, "timestamp": "2025-09-10 02:24:48.494669", "step": 4257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:24:48.530118", "step": 4257, "epoch": 2 }, { "type": "loss", "content": 0.00010247322643408552, "timestamp": "2025-09-10 02:24:48.543899", "step": 4258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:48.575265", "step": 4258, "epoch": 2 }, { "type": "loss", "content": 0.0012629638658836484, "timestamp": "2025-09-10 02:24:48.587583", "step": 4259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:48.620934", "step": 4259, "epoch": 2 }, { "type": "loss", "content": 0.002863981993868947, "timestamp": "2025-09-10 02:24:48.652075", "step": 4260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:48.685050", "step": 4260, "epoch": 2 }, { "type": "loss", "content": 0.00011709488171618432, "timestamp": "2025-09-10 02:24:48.698047", "step": 4261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:48.728803", "step": 4261, "epoch": 2 }, { "type": "loss", "content": 0.0035543248523026705, "timestamp": "2025-09-10 02:24:48.736430", "step": 4262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:24:48.768966", "step": 4262, "epoch": 2 }, { "type": "loss", "content": 0.0005866262363269925, "timestamp": "2025-09-10 02:24:48.773009", "step": 4263, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:24:58.849819", "step": 4263, "epoch": 2 }, { "type": "pplx", "content": 22181812.0487706, "timestamp": "2025-09-10 02:24:58.852988", "step": 4263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:58.883995", "step": 4263, "epoch": 2 }, { "type": "loss", "content": 0.0003768012975342572, "timestamp": "2025-09-10 02:24:58.916781", "step": 4264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:58.950995", "step": 4264, "epoch": 2 }, { "type": "loss", "content": 0.006254100706428289, "timestamp": "2025-09-10 02:24:58.958279", "step": 4265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:24:58.989706", "step": 4265, "epoch": 2 }, { "type": "loss", "content": 0.0002495805674698204, "timestamp": "2025-09-10 02:24:58.992194", "step": 4266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:59.024254", "step": 4266, "epoch": 2 }, { "type": "loss", "content": 6.783670687582344e-05, "timestamp": "2025-09-10 02:24:59.030786", "step": 4267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:59.062701", "step": 4267, "epoch": 2 }, { "type": "loss", "content": 0.00031304662115871906, "timestamp": "2025-09-10 02:24:59.093777", "step": 4268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:24:59.126070", "step": 4268, "epoch": 2 }, { "type": "loss", "content": 0.0011215128470212221, "timestamp": "2025-09-10 02:24:59.138732", "step": 4269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:59.171123", "step": 4269, "epoch": 2 }, { "type": "loss", "content": 0.00010794185072882101, "timestamp": "2025-09-10 02:24:59.179029", "step": 4270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:59.209624", "step": 4270, "epoch": 2 }, { "type": "loss", "content": 0.01281669456511736, "timestamp": "2025-09-10 02:24:59.213646", "step": 4271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:59.245430", "step": 4271, "epoch": 2 }, { "type": "loss", "content": 0.00017699485761113465, "timestamp": "2025-09-10 02:24:59.273229", "step": 4272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:24:59.307346", "step": 4272, "epoch": 2 }, { "type": "loss", "content": 0.007482998538762331, "timestamp": "2025-09-10 02:24:59.320324", "step": 4273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:59.351732", "step": 4273, "epoch": 2 }, { "type": "loss", "content": 0.002870141062885523, "timestamp": "2025-09-10 02:24:59.355889", "step": 4274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:24:59.386629", "step": 4274, "epoch": 2 }, { "type": "loss", "content": 0.00027059766580350697, "timestamp": "2025-09-10 02:24:59.393689", "step": 4275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:59.424586", "step": 4275, "epoch": 2 }, { "type": "loss", "content": 0.0001303361786995083, "timestamp": "2025-09-10 02:24:59.455849", "step": 4276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:24:59.487777", "step": 4276, "epoch": 2 }, { "type": "loss", "content": 0.0001203405117848888, "timestamp": "2025-09-10 02:24:59.492191", "step": 4277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:24:59.523578", "step": 4277, "epoch": 2 }, { "type": "loss", "content": 0.0019344912143424153, "timestamp": "2025-09-10 02:24:59.531227", "step": 4278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:24:59.568424", "step": 4278, "epoch": 2 }, { "type": "loss", "content": 0.00048476256779395044, "timestamp": "2025-09-10 02:24:59.582376", "step": 4279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:24:59.613549", "step": 4279, "epoch": 2 }, { "type": "loss", "content": 0.00010040303459390998, "timestamp": "2025-09-10 02:24:59.641773", "step": 4280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:24:59.673425", "step": 4280, "epoch": 2 }, { "type": "loss", "content": 0.00014088333409745246, "timestamp": "2025-09-10 02:24:59.675811", "step": 4281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:59.707021", "step": 4281, "epoch": 2 }, { "type": "loss", "content": 0.00018304158584214747, "timestamp": "2025-09-10 02:24:59.719352", "step": 4282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:59.751347", "step": 4282, "epoch": 2 }, { "type": "loss", "content": 0.0002209401864092797, "timestamp": "2025-09-10 02:24:59.755552", "step": 4283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:59.787338", "step": 4283, "epoch": 2 }, { "type": "loss", "content": 0.04991947486996651, "timestamp": "2025-09-10 02:24:59.812420", "step": 4284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:24:59.843487", "step": 4284, "epoch": 2 }, { "type": "loss", "content": 0.0009700055816210806, "timestamp": "2025-09-10 02:24:59.845657", "step": 4285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:24:59.877995", "step": 4285, "epoch": 2 }, { "type": "loss", "content": 0.0005855086492374539, "timestamp": "2025-09-10 02:24:59.890131", "step": 4286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:24:59.921077", "step": 4286, "epoch": 2 }, { "type": "loss", "content": 0.032629940658807755, "timestamp": "2025-09-10 02:24:59.933614", "step": 4287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:24:59.964627", "step": 4287, "epoch": 2 }, { "type": "loss", "content": 8.535251981811598e-05, "timestamp": "2025-09-10 02:24:59.995921", "step": 4288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:00.027327", "step": 4288, "epoch": 2 }, { "type": "loss", "content": 9.947276703314856e-05, "timestamp": "2025-09-10 02:25:00.035463", "step": 4289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:00.071823", "step": 4289, "epoch": 2 }, { "type": "loss", "content": 0.00045155364205129445, "timestamp": "2025-09-10 02:25:00.082136", "step": 4290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:00.113172", "step": 4290, "epoch": 2 }, { "type": "loss", "content": 0.0010928146075457335, "timestamp": "2025-09-10 02:25:00.123319", "step": 4291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:00.154266", "step": 4291, "epoch": 2 }, { "type": "loss", "content": 0.020004166290163994, "timestamp": "2025-09-10 02:25:00.182271", "step": 4292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:00.213350", "step": 4292, "epoch": 2 }, { "type": "loss", "content": 0.00102779152803123, "timestamp": "2025-09-10 02:25:00.218681", "step": 4293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:00.250280", "step": 4293, "epoch": 2 }, { "type": "loss", "content": 0.00010686110181268305, "timestamp": "2025-09-10 02:25:00.262661", "step": 4294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:00.292792", "step": 4294, "epoch": 2 }, { "type": "loss", "content": 0.00020121461420785636, "timestamp": "2025-09-10 02:25:00.305178", "step": 4295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:00.335235", "step": 4295, "epoch": 2 }, { "type": "loss", "content": 0.0013605983695015311, "timestamp": "2025-09-10 02:25:00.363064", "step": 4296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:00.393824", "step": 4296, "epoch": 2 }, { "type": "loss", "content": 0.0013821868924424052, "timestamp": "2025-09-10 02:25:00.398980", "step": 4297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:00.430139", "step": 4297, "epoch": 2 }, { "type": "loss", "content": 3.0794344638707116e-05, "timestamp": "2025-09-10 02:25:00.437218", "step": 4298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:00.468044", "step": 4298, "epoch": 2 }, { "type": "loss", "content": 0.004364358726888895, "timestamp": "2025-09-10 02:25:00.475072", "step": 4299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:00.506555", "step": 4299, "epoch": 2 }, { "type": "loss", "content": 0.0013004717184230685, "timestamp": "2025-09-10 02:25:00.534297", "step": 4300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:00.564881", "step": 4300, "epoch": 2 }, { "type": "loss", "content": 0.002206193981692195, "timestamp": "2025-09-10 02:25:00.572088", "step": 4301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:00.603449", "step": 4301, "epoch": 2 }, { "type": "loss", "content": 5.780989522463642e-05, "timestamp": "2025-09-10 02:25:00.610376", "step": 4302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:00.641640", "step": 4302, "epoch": 2 }, { "type": "loss", "content": 0.0021686165127903223, "timestamp": "2025-09-10 02:25:00.652197", "step": 4303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:00.685700", "step": 4303, "epoch": 2 }, { "type": "loss", "content": 0.0004125793057028204, "timestamp": "2025-09-10 02:25:00.718939", "step": 4304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:00.753236", "step": 4304, "epoch": 2 }, { "type": "loss", "content": 0.04362964630126953, "timestamp": "2025-09-10 02:25:00.758456", "step": 4305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:00.791572", "step": 4305, "epoch": 2 }, { "type": "loss", "content": 0.00016738659178372473, "timestamp": "2025-09-10 02:25:00.795703", "step": 4306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:00.826855", "step": 4306, "epoch": 2 }, { "type": "loss", "content": 0.00039224643842317164, "timestamp": "2025-09-10 02:25:00.833827", "step": 4307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:00.864064", "step": 4307, "epoch": 2 }, { "type": "loss", "content": 0.0002121599536621943, "timestamp": "2025-09-10 02:25:00.889610", "step": 4308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:00.920451", "step": 4308, "epoch": 2 }, { "type": "loss", "content": 0.0006208484992384911, "timestamp": "2025-09-10 02:25:00.926077", "step": 4309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:00.957605", "step": 4309, "epoch": 2 }, { "type": "loss", "content": 0.0001874407462310046, "timestamp": "2025-09-10 02:25:00.967556", "step": 4310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:00.998813", "step": 4310, "epoch": 2 }, { "type": "loss", "content": 3.587496030377224e-05, "timestamp": "2025-09-10 02:25:01.006283", "step": 4311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:01.037457", "step": 4311, "epoch": 2 }, { "type": "loss", "content": 0.001078314846381545, "timestamp": "2025-09-10 02:25:01.068721", "step": 4312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:01.100766", "step": 4312, "epoch": 2 }, { "type": "loss", "content": 0.00012618736946024, "timestamp": "2025-09-10 02:25:01.105848", "step": 4313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:01.141178", "step": 4313, "epoch": 2 }, { "type": "loss", "content": 0.020358415320515633, "timestamp": "2025-09-10 02:25:01.154952", "step": 4314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:01.189177", "step": 4314, "epoch": 2 }, { "type": "loss", "content": 0.0020916808862239122, "timestamp": "2025-09-10 02:25:01.202463", "step": 4315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:01.233436", "step": 4315, "epoch": 2 }, { "type": "loss", "content": 0.00011025350977433845, "timestamp": "2025-09-10 02:25:01.261997", "step": 4316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:01.294453", "step": 4316, "epoch": 2 }, { "type": "loss", "content": 0.00029134147916920483, "timestamp": "2025-09-10 02:25:01.303664", "step": 4317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:01.334769", "step": 4317, "epoch": 2 }, { "type": "loss", "content": 0.00012424368469510227, "timestamp": "2025-09-10 02:25:01.341911", "step": 4318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:01.373280", "step": 4318, "epoch": 2 }, { "type": "loss", "content": 9.490887896390632e-05, "timestamp": "2025-09-10 02:25:01.381056", "step": 4319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:01.412783", "step": 4319, "epoch": 2 }, { "type": "loss", "content": 0.00033820615499280393, "timestamp": "2025-09-10 02:25:01.440434", "step": 4320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:01.472346", "step": 4320, "epoch": 2 }, { "type": "loss", "content": 0.0010137018980458379, "timestamp": "2025-09-10 02:25:01.479957", "step": 4321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:01.512010", "step": 4321, "epoch": 2 }, { "type": "loss", "content": 0.0029211000073701143, "timestamp": "2025-09-10 02:25:01.519422", "step": 4322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:01.553581", "step": 4322, "epoch": 2 }, { "type": "loss", "content": 0.0003600471536628902, "timestamp": "2025-09-10 02:25:01.557064", "step": 4323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:01.595108", "step": 4323, "epoch": 2 }, { "type": "loss", "content": 0.0003258692449890077, "timestamp": "2025-09-10 02:25:01.628523", "step": 4324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:01.665460", "step": 4324, "epoch": 2 }, { "type": "loss", "content": 0.00018436498066876084, "timestamp": "2025-09-10 02:25:01.669802", "step": 4325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:01.706002", "step": 4325, "epoch": 2 }, { "type": "loss", "content": 0.00016462701023556292, "timestamp": "2025-09-10 02:25:01.712863", "step": 4326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:01.751547", "step": 4326, "epoch": 2 }, { "type": "loss", "content": 0.00015942190657369792, "timestamp": "2025-09-10 02:25:01.758328", "step": 4327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:01.797714", "step": 4327, "epoch": 2 }, { "type": "loss", "content": 0.0009382445714436471, "timestamp": "2025-09-10 02:25:01.821965", "step": 4328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:01.863480", "step": 4328, "epoch": 2 }, { "type": "loss", "content": 0.00022544125386048108, "timestamp": "2025-09-10 02:25:01.873011", "step": 4329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:01.907683", "step": 4329, "epoch": 2 }, { "type": "loss", "content": 0.00011031327449018136, "timestamp": "2025-09-10 02:25:01.911985", "step": 4330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:25:01.953697", "step": 4330, "epoch": 2 }, { "type": "loss", "content": 0.014851606450974941, "timestamp": "2025-09-10 02:25:01.967684", "step": 4331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:02.006933", "step": 4331, "epoch": 2 }, { "type": "loss", "content": 7.220734551083297e-05, "timestamp": "2025-09-10 02:25:02.032416", "step": 4332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:02.065054", "step": 4332, "epoch": 2 }, { "type": "loss", "content": 0.00027742632664740086, "timestamp": "2025-09-10 02:25:02.070467", "step": 4333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:25:02.110243", "step": 4333, "epoch": 2 }, { "type": "loss", "content": 0.00022008584346622229, "timestamp": "2025-09-10 02:25:02.126339", "step": 4334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:02.157983", "step": 4334, "epoch": 2 }, { "type": "loss", "content": 0.02434348128736019, "timestamp": "2025-09-10 02:25:02.165668", "step": 4335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:02.198239", "step": 4335, "epoch": 2 }, { "type": "loss", "content": 6.505424244096503e-05, "timestamp": "2025-09-10 02:25:02.229114", "step": 4336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:02.260400", "step": 4336, "epoch": 2 }, { "type": "loss", "content": 0.0028443282935768366, "timestamp": "2025-09-10 02:25:02.265722", "step": 4337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:02.297700", "step": 4337, "epoch": 2 }, { "type": "loss", "content": 0.021456856280565262, "timestamp": "2025-09-10 02:25:02.305252", "step": 4338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:02.336131", "step": 4338, "epoch": 2 }, { "type": "loss", "content": 0.00022894078574609011, "timestamp": "2025-09-10 02:25:02.338752", "step": 4339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:25:02.381660", "step": 4339, "epoch": 2 }, { "type": "loss", "content": 0.0002486018347553909, "timestamp": "2025-09-10 02:25:02.418933", "step": 4340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:02.449597", "step": 4340, "epoch": 2 }, { "type": "loss", "content": 0.008721557445824146, "timestamp": "2025-09-10 02:25:02.454124", "step": 4341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:02.484784", "step": 4341, "epoch": 2 }, { "type": "loss", "content": 0.025644836947321892, "timestamp": "2025-09-10 02:25:02.492080", "step": 4342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:02.526057", "step": 4342, "epoch": 2 }, { "type": "loss", "content": 0.002075839089229703, "timestamp": "2025-09-10 02:25:02.530195", "step": 4343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:02.561412", "step": 4343, "epoch": 2 }, { "type": "loss", "content": 0.0144526781514287, "timestamp": "2025-09-10 02:25:02.588953", "step": 4344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:02.619914", "step": 4344, "epoch": 2 }, { "type": "loss", "content": 0.009567998349666595, "timestamp": "2025-09-10 02:25:02.624449", "step": 4345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:02.658395", "step": 4345, "epoch": 2 }, { "type": "loss", "content": 0.0002823833783622831, "timestamp": "2025-09-10 02:25:02.665186", "step": 4346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:02.696805", "step": 4346, "epoch": 2 }, { "type": "loss", "content": 0.005126704927533865, "timestamp": "2025-09-10 02:25:02.706668", "step": 4347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:02.737506", "step": 4347, "epoch": 2 }, { "type": "loss", "content": 6.73050744808279e-05, "timestamp": "2025-09-10 02:25:02.765447", "step": 4348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:02.797360", "step": 4348, "epoch": 2 }, { "type": "loss", "content": 0.0014881890965625644, "timestamp": "2025-09-10 02:25:02.807494", "step": 4349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:02.838380", "step": 4349, "epoch": 2 }, { "type": "loss", "content": 0.0008507216116413474, "timestamp": "2025-09-10 02:25:02.842906", "step": 4350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:02.877739", "step": 4350, "epoch": 2 }, { "type": "loss", "content": 0.00017134180234279484, "timestamp": "2025-09-10 02:25:02.884535", "step": 4351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:02.915481", "step": 4351, "epoch": 2 }, { "type": "loss", "content": 0.008143614046275616, "timestamp": "2025-09-10 02:25:02.943789", "step": 4352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:02.974733", "step": 4352, "epoch": 2 }, { "type": "loss", "content": 0.005178903229534626, "timestamp": "2025-09-10 02:25:02.977062", "step": 4353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:03.010208", "step": 4353, "epoch": 2 }, { "type": "loss", "content": 0.0012402004795148969, "timestamp": "2025-09-10 02:25:03.018067", "step": 4354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:03.048968", "step": 4354, "epoch": 2 }, { "type": "loss", "content": 0.00033361284295096993, "timestamp": "2025-09-10 02:25:03.059247", "step": 4355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.096653", "step": 4355, "epoch": 2 }, { "type": "loss", "content": 9.43372942856513e-05, "timestamp": "2025-09-10 02:25:03.122014", "step": 4356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:03.152986", "step": 4356, "epoch": 2 }, { "type": "loss", "content": 0.010012110695242882, "timestamp": "2025-09-10 02:25:03.162705", "step": 4357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:03.194345", "step": 4357, "epoch": 2 }, { "type": "loss", "content": 0.0008857456268742681, "timestamp": "2025-09-10 02:25:03.206853", "step": 4358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:03.238672", "step": 4358, "epoch": 2 }, { "type": "loss", "content": 0.0003111858095508069, "timestamp": "2025-09-10 02:25:03.242498", "step": 4359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.273121", "step": 4359, "epoch": 2 }, { "type": "loss", "content": 0.0002316091413376853, "timestamp": "2025-09-10 02:25:03.298351", "step": 4360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:03.329540", "step": 4360, "epoch": 2 }, { "type": "loss", "content": 0.009092413820326328, "timestamp": "2025-09-10 02:25:03.334165", "step": 4361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:03.365369", "step": 4361, "epoch": 2 }, { "type": "loss", "content": 0.00018735427875071764, "timestamp": "2025-09-10 02:25:03.377920", "step": 4362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:03.409909", "step": 4362, "epoch": 2 }, { "type": "loss", "content": 0.026521209627389908, "timestamp": "2025-09-10 02:25:03.416748", "step": 4363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.447157", "step": 4363, "epoch": 2 }, { "type": "loss", "content": 0.000702383928000927, "timestamp": "2025-09-10 02:25:03.472725", "step": 4364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:03.504432", "step": 4364, "epoch": 2 }, { "type": "loss", "content": 0.02729278802871704, "timestamp": "2025-09-10 02:25:03.513002", "step": 4365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:03.544765", "step": 4365, "epoch": 2 }, { "type": "loss", "content": 0.0004027434333693236, "timestamp": "2025-09-10 02:25:03.554960", "step": 4366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:03.585808", "step": 4366, "epoch": 2 }, { "type": "loss", "content": 0.002170759718865156, "timestamp": "2025-09-10 02:25:03.593496", "step": 4367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:03.626118", "step": 4367, "epoch": 2 }, { "type": "loss", "content": 0.00022459625324700028, "timestamp": "2025-09-10 02:25:03.650096", "step": 4368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:03.682870", "step": 4368, "epoch": 2 }, { "type": "loss", "content": 0.0006497269496321678, "timestamp": "2025-09-10 02:25:03.686874", "step": 4369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:03.718865", "step": 4369, "epoch": 2 }, { "type": "loss", "content": 0.004197689704596996, "timestamp": "2025-09-10 02:25:03.725576", "step": 4370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:03.757710", "step": 4370, "epoch": 2 }, { "type": "loss", "content": 0.021389422938227654, "timestamp": "2025-09-10 02:25:03.760473", "step": 4371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.791399", "step": 4371, "epoch": 2 }, { "type": "loss", "content": 0.0005672698607668281, "timestamp": "2025-09-10 02:25:03.816643", "step": 4372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:03.848327", "step": 4372, "epoch": 2 }, { "type": "loss", "content": 0.001765951863490045, "timestamp": "2025-09-10 02:25:03.853585", "step": 4373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.884873", "step": 4373, "epoch": 2 }, { "type": "loss", "content": 0.00043063057819381356, "timestamp": "2025-09-10 02:25:03.889252", "step": 4374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:03.921343", "step": 4374, "epoch": 2 }, { "type": "loss", "content": 0.00024490643409080803, "timestamp": "2025-09-10 02:25:03.929220", "step": 4375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:03.960344", "step": 4375, "epoch": 2 }, { "type": "loss", "content": 0.0008961300482042134, "timestamp": "2025-09-10 02:25:03.985581", "step": 4376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:04.017214", "step": 4376, "epoch": 2 }, { "type": "loss", "content": 0.05845966935157776, "timestamp": "2025-09-10 02:25:04.019595", "step": 4377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:04.050407", "step": 4377, "epoch": 2 }, { "type": "loss", "content": 0.025336632505059242, "timestamp": "2025-09-10 02:25:04.061385", "step": 4378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:04.100938", "step": 4378, "epoch": 2 }, { "type": "loss", "content": 0.010996916331350803, "timestamp": "2025-09-10 02:25:04.114689", "step": 4379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:04.149851", "step": 4379, "epoch": 2 }, { "type": "loss", "content": 0.00864367000758648, "timestamp": "2025-09-10 02:25:04.184450", "step": 4380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:04.215617", "step": 4380, "epoch": 2 }, { "type": "loss", "content": 9.313374903285876e-05, "timestamp": "2025-09-10 02:25:04.220604", "step": 4381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:04.254668", "step": 4381, "epoch": 2 }, { "type": "loss", "content": 0.00039597839349880815, "timestamp": "2025-09-10 02:25:04.268392", "step": 4382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:04.308879", "step": 4382, "epoch": 2 }, { "type": "loss", "content": 0.0007557374192401767, "timestamp": "2025-09-10 02:25:04.316489", "step": 4383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:04.356029", "step": 4383, "epoch": 2 }, { "type": "loss", "content": 0.00034854214754886925, "timestamp": "2025-09-10 02:25:04.384274", "step": 4384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:04.416262", "step": 4384, "epoch": 2 }, { "type": "loss", "content": 0.0001689386263024062, "timestamp": "2025-09-10 02:25:04.420840", "step": 4385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:04.451403", "step": 4385, "epoch": 2 }, { "type": "loss", "content": 0.00041699831490404904, "timestamp": "2025-09-10 02:25:04.455584", "step": 4386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:04.486246", "step": 4386, "epoch": 2 }, { "type": "loss", "content": 0.07199867814779282, "timestamp": "2025-09-10 02:25:04.489002", "step": 4387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:04.520589", "step": 4387, "epoch": 2 }, { "type": "loss", "content": 0.04156893119215965, "timestamp": "2025-09-10 02:25:04.549335", "step": 4388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:04.581471", "step": 4388, "epoch": 2 }, { "type": "loss", "content": 0.0010044872760772705, "timestamp": "2025-09-10 02:25:04.594236", "step": 4389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:04.627227", "step": 4389, "epoch": 2 }, { "type": "loss", "content": 0.009525059722363949, "timestamp": "2025-09-10 02:25:04.634148", "step": 4390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:04.666518", "step": 4390, "epoch": 2 }, { "type": "loss", "content": 0.010987967252731323, "timestamp": "2025-09-10 02:25:04.677857", "step": 4391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:04.710710", "step": 4391, "epoch": 2 }, { "type": "loss", "content": 0.00037763340515084565, "timestamp": "2025-09-10 02:25:04.738121", "step": 4392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:25:04.782454", "step": 4392, "epoch": 2 }, { "type": "loss", "content": 0.009227042086422443, "timestamp": "2025-09-10 02:25:04.801406", "step": 4393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:04.836946", "step": 4393, "epoch": 2 }, { "type": "loss", "content": 0.0056563569232821465, "timestamp": "2025-09-10 02:25:04.843328", "step": 4394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:04.878055", "step": 4394, "epoch": 2 }, { "type": "loss", "content": 0.002920966362580657, "timestamp": "2025-09-10 02:25:04.891381", "step": 4395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:04.923937", "step": 4395, "epoch": 2 }, { "type": "loss", "content": 0.0005617746501229703, "timestamp": "2025-09-10 02:25:04.951134", "step": 4396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:04.985495", "step": 4396, "epoch": 2 }, { "type": "loss", "content": 0.0088628139346838, "timestamp": "2025-09-10 02:25:04.988524", "step": 4397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:05.022980", "step": 4397, "epoch": 2 }, { "type": "loss", "content": 0.002569663105532527, "timestamp": "2025-09-10 02:25:05.028624", "step": 4398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:05.064404", "step": 4398, "epoch": 2 }, { "type": "loss", "content": 0.004381218459457159, "timestamp": "2025-09-10 02:25:05.078266", "step": 4399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:05.110148", "step": 4399, "epoch": 2 }, { "type": "loss", "content": 0.00011449779412942007, "timestamp": "2025-09-10 02:25:05.137669", "step": 4400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:25:05.175220", "step": 4400, "epoch": 2 }, { "type": "loss", "content": 0.0009381847339682281, "timestamp": "2025-09-10 02:25:05.190670", "step": 4401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:05.225844", "step": 4401, "epoch": 2 }, { "type": "loss", "content": 0.003086991375312209, "timestamp": "2025-09-10 02:25:05.239188", "step": 4402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:05.270382", "step": 4402, "epoch": 2 }, { "type": "loss", "content": 0.0008446700521744788, "timestamp": "2025-09-10 02:25:05.274812", "step": 4403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:05.306126", "step": 4403, "epoch": 2 }, { "type": "loss", "content": 0.012834797613322735, "timestamp": "2025-09-10 02:25:05.331405", "step": 4404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:05.362930", "step": 4404, "epoch": 2 }, { "type": "loss", "content": 0.002319957595318556, "timestamp": "2025-09-10 02:25:05.370483", "step": 4405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:05.402303", "step": 4405, "epoch": 2 }, { "type": "loss", "content": 0.021244987845420837, "timestamp": "2025-09-10 02:25:05.406316", "step": 4406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:05.438704", "step": 4406, "epoch": 2 }, { "type": "loss", "content": 0.0019213539781048894, "timestamp": "2025-09-10 02:25:05.446034", "step": 4407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:05.479814", "step": 4407, "epoch": 2 }, { "type": "loss", "content": 0.02890808694064617, "timestamp": "2025-09-10 02:25:05.514178", "step": 4408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:05.546653", "step": 4408, "epoch": 2 }, { "type": "loss", "content": 0.007281397935003042, "timestamp": "2025-09-10 02:25:05.551380", "step": 4409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:05.583266", "step": 4409, "epoch": 2 }, { "type": "loss", "content": 0.00019025967048946768, "timestamp": "2025-09-10 02:25:05.587500", "step": 4410, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:25:15.850616", "step": 4410, "epoch": 2 }, { "type": "pplx", "content": 20370479.949023202, "timestamp": "2025-09-10 02:25:15.854937", "step": 4410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:15.885941", "step": 4410, "epoch": 2 }, { "type": "loss", "content": 0.0011351705761626363, "timestamp": "2025-09-10 02:25:15.895670", "step": 4411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:15.930248", "step": 4411, "epoch": 2 }, { "type": "loss", "content": 0.0018093172693625093, "timestamp": "2025-09-10 02:25:15.953841", "step": 4412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:15.985315", "step": 4412, "epoch": 2 }, { "type": "loss", "content": 0.025083277374505997, "timestamp": "2025-09-10 02:25:15.989623", "step": 4413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:16.019818", "step": 4413, "epoch": 2 }, { "type": "loss", "content": 0.0019721267744898796, "timestamp": "2025-09-10 02:25:16.032036", "step": 4414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:16.061635", "step": 4414, "epoch": 2 }, { "type": "loss", "content": 0.0004431180714163929, "timestamp": "2025-09-10 02:25:16.071615", "step": 4415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:16.102136", "step": 4415, "epoch": 2 }, { "type": "loss", "content": 0.006176200695335865, "timestamp": "2025-09-10 02:25:16.133398", "step": 4416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:16.163904", "step": 4416, "epoch": 2 }, { "type": "loss", "content": 0.0014073234051465988, "timestamp": "2025-09-10 02:25:16.176495", "step": 4417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:16.208014", "step": 4417, "epoch": 2 }, { "type": "loss", "content": 0.0057088471949100494, "timestamp": "2025-09-10 02:25:16.220069", "step": 4418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:16.250957", "step": 4418, "epoch": 2 }, { "type": "loss", "content": 0.0012675122125074267, "timestamp": "2025-09-10 02:25:16.257909", "step": 4419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:16.288247", "step": 4419, "epoch": 2 }, { "type": "loss", "content": 0.01437693927437067, "timestamp": "2025-09-10 02:25:16.316874", "step": 4420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:16.347430", "step": 4420, "epoch": 2 }, { "type": "loss", "content": 0.0025128854904323816, "timestamp": "2025-09-10 02:25:16.352338", "step": 4421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:16.382941", "step": 4421, "epoch": 2 }, { "type": "loss", "content": 0.003963653929531574, "timestamp": "2025-09-10 02:25:16.385672", "step": 4422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:16.416172", "step": 4422, "epoch": 2 }, { "type": "loss", "content": 0.020399346947669983, "timestamp": "2025-09-10 02:25:16.423691", "step": 4423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:16.453886", "step": 4423, "epoch": 2 }, { "type": "loss", "content": 0.03138017654418945, "timestamp": "2025-09-10 02:25:16.487035", "step": 4424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:16.517208", "step": 4424, "epoch": 2 }, { "type": "loss", "content": 0.0019118200289085507, "timestamp": "2025-09-10 02:25:16.526815", "step": 4425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:16.557895", "step": 4425, "epoch": 2 }, { "type": "loss", "content": 0.005137981381267309, "timestamp": "2025-09-10 02:25:16.565386", "step": 4426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:16.596353", "step": 4426, "epoch": 2 }, { "type": "loss", "content": 0.001418622094206512, "timestamp": "2025-09-10 02:25:16.607299", "step": 4427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:16.638091", "step": 4427, "epoch": 2 }, { "type": "loss", "content": 0.0003645730612333864, "timestamp": "2025-09-10 02:25:16.666574", "step": 4428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:16.696492", "step": 4428, "epoch": 2 }, { "type": "loss", "content": 0.00549284229055047, "timestamp": "2025-09-10 02:25:16.701706", "step": 4429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:16.732318", "step": 4429, "epoch": 2 }, { "type": "loss", "content": 0.04728490859270096, "timestamp": "2025-09-10 02:25:16.739376", "step": 4430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:16.769497", "step": 4430, "epoch": 2 }, { "type": "loss", "content": 0.001036238856613636, "timestamp": "2025-09-10 02:25:16.781689", "step": 4431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:16.816547", "step": 4431, "epoch": 2 }, { "type": "loss", "content": 0.009465152397751808, "timestamp": "2025-09-10 02:25:16.851174", "step": 4432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:16.881588", "step": 4432, "epoch": 2 }, { "type": "loss", "content": 0.012627379037439823, "timestamp": "2025-09-10 02:25:16.883722", "step": 4433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:16.913932", "step": 4433, "epoch": 2 }, { "type": "loss", "content": 0.00041100315866060555, "timestamp": "2025-09-10 02:25:16.916237", "step": 4434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:16.945630", "step": 4434, "epoch": 2 }, { "type": "loss", "content": 0.0013999169459566474, "timestamp": "2025-09-10 02:25:16.953387", "step": 4435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:16.983926", "step": 4435, "epoch": 2 }, { "type": "loss", "content": 0.02100582979619503, "timestamp": "2025-09-10 02:25:17.012694", "step": 4436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:25:17.053341", "step": 4436, "epoch": 2 }, { "type": "loss", "content": 0.001676683546975255, "timestamp": "2025-09-10 02:25:17.070677", "step": 4437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:17.101443", "step": 4437, "epoch": 2 }, { "type": "loss", "content": 0.0023947085719555616, "timestamp": "2025-09-10 02:25:17.109357", "step": 4438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:17.140639", "step": 4438, "epoch": 2 }, { "type": "loss", "content": 0.027537260204553604, "timestamp": "2025-09-10 02:25:17.147759", "step": 4439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:17.177732", "step": 4439, "epoch": 2 }, { "type": "loss", "content": 0.008474222384393215, "timestamp": "2025-09-10 02:25:17.210732", "step": 4440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:17.240867", "step": 4440, "epoch": 2 }, { "type": "loss", "content": 0.005611742846667767, "timestamp": "2025-09-10 02:25:17.243416", "step": 4441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:17.281556", "step": 4441, "epoch": 2 }, { "type": "loss", "content": 0.0019225550349801779, "timestamp": "2025-09-10 02:25:17.297179", "step": 4442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:17.329927", "step": 4442, "epoch": 2 }, { "type": "loss", "content": 0.001591675216332078, "timestamp": "2025-09-10 02:25:17.336903", "step": 4443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:17.367487", "step": 4443, "epoch": 2 }, { "type": "loss", "content": 0.0011540406849235296, "timestamp": "2025-09-10 02:25:17.395419", "step": 4444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:25:17.431151", "step": 4444, "epoch": 2 }, { "type": "loss", "content": 0.0016605369746685028, "timestamp": "2025-09-10 02:25:17.444438", "step": 4445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:17.474779", "step": 4445, "epoch": 2 }, { "type": "loss", "content": 0.033832911401987076, "timestamp": "2025-09-10 02:25:17.477354", "step": 4446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:17.507953", "step": 4446, "epoch": 2 }, { "type": "loss", "content": 0.02437257580459118, "timestamp": "2025-09-10 02:25:17.520474", "step": 4447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:17.557930", "step": 4447, "epoch": 2 }, { "type": "loss", "content": 0.0006624148809351027, "timestamp": "2025-09-10 02:25:17.594424", "step": 4448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:17.626626", "step": 4448, "epoch": 2 }, { "type": "loss", "content": 0.005153920501470566, "timestamp": "2025-09-10 02:25:17.628614", "step": 4449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:17.666898", "step": 4449, "epoch": 2 }, { "type": "loss", "content": 0.025530895218253136, "timestamp": "2025-09-10 02:25:17.682503", "step": 4450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:17.714804", "step": 4450, "epoch": 2 }, { "type": "loss", "content": 0.00903196632862091, "timestamp": "2025-09-10 02:25:17.722651", "step": 4451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:17.756406", "step": 4451, "epoch": 2 }, { "type": "loss", "content": 0.018469911068677902, "timestamp": "2025-09-10 02:25:17.788252", "step": 4452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:17.823443", "step": 4452, "epoch": 2 }, { "type": "loss", "content": 0.00648995628580451, "timestamp": "2025-09-10 02:25:17.828496", "step": 4453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:17.859668", "step": 4453, "epoch": 2 }, { "type": "loss", "content": 0.0027806435246020555, "timestamp": "2025-09-10 02:25:17.869766", "step": 4454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:17.899869", "step": 4454, "epoch": 2 }, { "type": "loss", "content": 0.0024515967816114426, "timestamp": "2025-09-10 02:25:17.906768", "step": 4455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:17.937126", "step": 4455, "epoch": 2 }, { "type": "loss", "content": 0.0002551526122260839, "timestamp": "2025-09-10 02:25:17.969578", "step": 4456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:17.999872", "step": 4456, "epoch": 2 }, { "type": "loss", "content": 0.003521733218804002, "timestamp": "2025-09-10 02:25:18.008432", "step": 4457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:18.038618", "step": 4457, "epoch": 2 }, { "type": "loss", "content": 0.0038238188717514277, "timestamp": "2025-09-10 02:25:18.041246", "step": 4458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:18.071557", "step": 4458, "epoch": 2 }, { "type": "loss", "content": 0.001157488557510078, "timestamp": "2025-09-10 02:25:18.076059", "step": 4459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:18.106247", "step": 4459, "epoch": 2 }, { "type": "loss", "content": 0.006657246965914965, "timestamp": "2025-09-10 02:25:18.136622", "step": 4460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:18.169556", "step": 4460, "epoch": 2 }, { "type": "loss", "content": 0.0043445127084851265, "timestamp": "2025-09-10 02:25:18.177946", "step": 4461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:18.212263", "step": 4461, "epoch": 2 }, { "type": "loss", "content": 0.006325058173388243, "timestamp": "2025-09-10 02:25:18.225631", "step": 4462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:18.255544", "step": 4462, "epoch": 2 }, { "type": "loss", "content": 0.031205790117383003, "timestamp": "2025-09-10 02:25:18.260132", "step": 4463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:18.293387", "step": 4463, "epoch": 2 }, { "type": "loss", "content": 0.006542644929140806, "timestamp": "2025-09-10 02:25:18.327670", "step": 4464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:18.358221", "step": 4464, "epoch": 2 }, { "type": "loss", "content": 0.0034815913531929255, "timestamp": "2025-09-10 02:25:18.360567", "step": 4465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:18.393096", "step": 4465, "epoch": 2 }, { "type": "loss", "content": 0.0044549135491251945, "timestamp": "2025-09-10 02:25:18.403353", "step": 4466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:18.435701", "step": 4466, "epoch": 2 }, { "type": "loss", "content": 0.007206571288406849, "timestamp": "2025-09-10 02:25:18.446597", "step": 4467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:18.479836", "step": 4467, "epoch": 2 }, { "type": "loss", "content": 0.0019098568009212613, "timestamp": "2025-09-10 02:25:18.514083", "step": 4468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:18.545584", "step": 4468, "epoch": 2 }, { "type": "loss", "content": 0.009369590319693089, "timestamp": "2025-09-10 02:25:18.553335", "step": 4469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:18.585127", "step": 4469, "epoch": 2 }, { "type": "loss", "content": 0.004385852254927158, "timestamp": "2025-09-10 02:25:18.592851", "step": 4470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:18.624213", "step": 4470, "epoch": 2 }, { "type": "loss", "content": 0.0033103374298661947, "timestamp": "2025-09-10 02:25:18.631120", "step": 4471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:18.663428", "step": 4471, "epoch": 2 }, { "type": "loss", "content": 0.023561924695968628, "timestamp": "2025-09-10 02:25:18.691155", "step": 4472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:18.722928", "step": 4472, "epoch": 2 }, { "type": "loss", "content": 0.012181418016552925, "timestamp": "2025-09-10 02:25:18.728189", "step": 4473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:18.765187", "step": 4473, "epoch": 2 }, { "type": "loss", "content": 0.001010423176921904, "timestamp": "2025-09-10 02:25:18.772216", "step": 4474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:18.805784", "step": 4474, "epoch": 2 }, { "type": "loss", "content": 0.013591425493359566, "timestamp": "2025-09-10 02:25:18.812254", "step": 4475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:18.849404", "step": 4475, "epoch": 2 }, { "type": "loss", "content": 0.007945683784782887, "timestamp": "2025-09-10 02:25:18.873116", "step": 4476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:18.903332", "step": 4476, "epoch": 2 }, { "type": "loss", "content": 0.010526351630687714, "timestamp": "2025-09-10 02:25:18.913240", "step": 4477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:18.950587", "step": 4477, "epoch": 2 }, { "type": "loss", "content": 0.0060272216796875, "timestamp": "2025-09-10 02:25:18.954742", "step": 4478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:18.987285", "step": 4478, "epoch": 2 }, { "type": "loss", "content": 0.010436930693686008, "timestamp": "2025-09-10 02:25:18.994660", "step": 4479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:19.025145", "step": 4479, "epoch": 2 }, { "type": "loss", "content": 0.002587387105450034, "timestamp": "2025-09-10 02:25:19.056094", "step": 4480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:19.086029", "step": 4480, "epoch": 2 }, { "type": "loss", "content": 0.002534937346354127, "timestamp": "2025-09-10 02:25:19.090628", "step": 4481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:19.123598", "step": 4481, "epoch": 2 }, { "type": "loss", "content": 0.03924372047185898, "timestamp": "2025-09-10 02:25:19.134548", "step": 4482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:19.166117", "step": 4482, "epoch": 2 }, { "type": "loss", "content": 0.0028582927770912647, "timestamp": "2025-09-10 02:25:19.173122", "step": 4483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:19.205766", "step": 4483, "epoch": 2 }, { "type": "loss", "content": 0.0013124326942488551, "timestamp": "2025-09-10 02:25:19.231252", "step": 4484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:19.262019", "step": 4484, "epoch": 2 }, { "type": "loss", "content": 0.000659207405988127, "timestamp": "2025-09-10 02:25:19.269985", "step": 4485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:19.299946", "step": 4485, "epoch": 2 }, { "type": "loss", "content": 0.0035707519855350256, "timestamp": "2025-09-10 02:25:19.302674", "step": 4486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:19.333939", "step": 4486, "epoch": 2 }, { "type": "loss", "content": 0.003890756983309984, "timestamp": "2025-09-10 02:25:19.338618", "step": 4487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:19.367790", "step": 4487, "epoch": 2 }, { "type": "loss", "content": 0.003235712181776762, "timestamp": "2025-09-10 02:25:19.391384", "step": 4488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:19.421271", "step": 4488, "epoch": 2 }, { "type": "loss", "content": 0.0025943939108401537, "timestamp": "2025-09-10 02:25:19.429195", "step": 4489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:19.459709", "step": 4489, "epoch": 2 }, { "type": "loss", "content": 0.0013656103983521461, "timestamp": "2025-09-10 02:25:19.472263", "step": 4490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:19.502937", "step": 4490, "epoch": 2 }, { "type": "loss", "content": 0.0037658896762877703, "timestamp": "2025-09-10 02:25:19.509624", "step": 4491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:19.541567", "step": 4491, "epoch": 2 }, { "type": "loss", "content": 0.002117105294018984, "timestamp": "2025-09-10 02:25:19.574046", "step": 4492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:19.609978", "step": 4492, "epoch": 2 }, { "type": "loss", "content": 0.0034039022866636515, "timestamp": "2025-09-10 02:25:19.625185", "step": 4493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:19.660066", "step": 4493, "epoch": 2 }, { "type": "loss", "content": 0.007604501210153103, "timestamp": "2025-09-10 02:25:19.673916", "step": 4494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:19.706338", "step": 4494, "epoch": 2 }, { "type": "loss", "content": 0.0011244564084336162, "timestamp": "2025-09-10 02:25:19.713059", "step": 4495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:19.746550", "step": 4495, "epoch": 2 }, { "type": "loss", "content": 0.006800326984375715, "timestamp": "2025-09-10 02:25:19.780819", "step": 4496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:19.813417", "step": 4496, "epoch": 2 }, { "type": "loss", "content": 0.01871911995112896, "timestamp": "2025-09-10 02:25:19.818464", "step": 4497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:19.848580", "step": 4497, "epoch": 2 }, { "type": "loss", "content": 0.0017440064111724496, "timestamp": "2025-09-10 02:25:19.851210", "step": 4498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:19.881445", "step": 4498, "epoch": 2 }, { "type": "loss", "content": 0.0011718474561348557, "timestamp": "2025-09-10 02:25:19.891679", "step": 4499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:19.922466", "step": 4499, "epoch": 2 }, { "type": "loss", "content": 0.0032657107803970575, "timestamp": "2025-09-10 02:25:19.953441", "step": 4500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 4500", "timestamp": "2025-09-10 02:25:24.557607", "step": 4500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:24.600123", "step": 4500, "epoch": 2 }, { "type": "loss", "content": 0.0034066697116941214, "timestamp": "2025-09-10 02:25:24.610227", "step": 4501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:24.646491", "step": 4501, "epoch": 2 }, { "type": "loss", "content": 0.002542842412367463, "timestamp": "2025-09-10 02:25:24.653568", "step": 4502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:24.685272", "step": 4502, "epoch": 2 }, { "type": "loss", "content": 0.002150564454495907, "timestamp": "2025-09-10 02:25:24.697290", "step": 4503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:24.729074", "step": 4503, "epoch": 2 }, { "type": "loss", "content": 0.001930785016156733, "timestamp": "2025-09-10 02:25:24.757013", "step": 4504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:24.789288", "step": 4504, "epoch": 2 }, { "type": "loss", "content": 0.003784495871514082, "timestamp": "2025-09-10 02:25:24.793966", "step": 4505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:24.826430", "step": 4505, "epoch": 2 }, { "type": "loss", "content": 0.010695603676140308, "timestamp": "2025-09-10 02:25:24.833409", "step": 4506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:24.868356", "step": 4506, "epoch": 2 }, { "type": "loss", "content": 0.0040979208424687386, "timestamp": "2025-09-10 02:25:24.870690", "step": 4507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:25:24.905472", "step": 4507, "epoch": 2 }, { "type": "loss", "content": 0.002046718029305339, "timestamp": "2025-09-10 02:25:24.940320", "step": 4508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:24.972380", "step": 4508, "epoch": 2 }, { "type": "loss", "content": 0.0030817301012575626, "timestamp": "2025-09-10 02:25:24.976751", "step": 4509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:25.009031", "step": 4509, "epoch": 2 }, { "type": "loss", "content": 0.0035417492035776377, "timestamp": "2025-09-10 02:25:25.021505", "step": 4510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:25.052846", "step": 4510, "epoch": 2 }, { "type": "loss", "content": 0.0019691623747348785, "timestamp": "2025-09-10 02:25:25.060224", "step": 4511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:25.092805", "step": 4511, "epoch": 2 }, { "type": "loss", "content": 0.0045999689027667046, "timestamp": "2025-09-10 02:25:25.120632", "step": 4512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:25.152028", "step": 4512, "epoch": 2 }, { "type": "loss", "content": 0.0011519378749653697, "timestamp": "2025-09-10 02:25:25.156253", "step": 4513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:25.190703", "step": 4513, "epoch": 2 }, { "type": "loss", "content": 0.003534820629283786, "timestamp": "2025-09-10 02:25:25.204538", "step": 4514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:25.235373", "step": 4514, "epoch": 2 }, { "type": "loss", "content": 0.0012388339964672923, "timestamp": "2025-09-10 02:25:25.242299", "step": 4515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:25.273546", "step": 4515, "epoch": 2 }, { "type": "loss", "content": 0.004160807467997074, "timestamp": "2025-09-10 02:25:25.301250", "step": 4516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:25.333171", "step": 4516, "epoch": 2 }, { "type": "loss", "content": 0.022341666743159294, "timestamp": "2025-09-10 02:25:25.337743", "step": 4517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:25.370232", "step": 4517, "epoch": 2 }, { "type": "loss", "content": 0.0011338494950905442, "timestamp": "2025-09-10 02:25:25.381798", "step": 4518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:25.414671", "step": 4518, "epoch": 2 }, { "type": "loss", "content": 0.015123574994504452, "timestamp": "2025-09-10 02:25:25.421016", "step": 4519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:25.452284", "step": 4519, "epoch": 2 }, { "type": "loss", "content": 0.0006013158708810806, "timestamp": "2025-09-10 02:25:25.477041", "step": 4520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:25.510533", "step": 4520, "epoch": 2 }, { "type": "loss", "content": 0.004607627633959055, "timestamp": "2025-09-10 02:25:25.523661", "step": 4521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:25.555730", "step": 4521, "epoch": 2 }, { "type": "loss", "content": 0.0005228667287155986, "timestamp": "2025-09-10 02:25:25.563370", "step": 4522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:25.594818", "step": 4522, "epoch": 2 }, { "type": "loss", "content": 0.0018962175818160176, "timestamp": "2025-09-10 02:25:25.604537", "step": 4523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:25.635292", "step": 4523, "epoch": 2 }, { "type": "loss", "content": 0.0010046407114714384, "timestamp": "2025-09-10 02:25:25.666066", "step": 4524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:25.698133", "step": 4524, "epoch": 2 }, { "type": "loss", "content": 0.0004124719125684351, "timestamp": "2025-09-10 02:25:25.705611", "step": 4525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:25.737235", "step": 4525, "epoch": 2 }, { "type": "loss", "content": 0.00014472127077169716, "timestamp": "2025-09-10 02:25:25.741466", "step": 4526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:25.772457", "step": 4526, "epoch": 2 }, { "type": "loss", "content": 0.0033601843751966953, "timestamp": "2025-09-10 02:25:25.778985", "step": 4527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:25.811525", "step": 4527, "epoch": 2 }, { "type": "loss", "content": 0.00247010076418519, "timestamp": "2025-09-10 02:25:25.842144", "step": 4528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:25.873629", "step": 4528, "epoch": 2 }, { "type": "loss", "content": 0.03199951723217964, "timestamp": "2025-09-10 02:25:25.877737", "step": 4529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:25.911048", "step": 4529, "epoch": 2 }, { "type": "loss", "content": 0.004257894121110439, "timestamp": "2025-09-10 02:25:25.918120", "step": 4530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:25.949885", "step": 4530, "epoch": 2 }, { "type": "loss", "content": 0.0010970250004902482, "timestamp": "2025-09-10 02:25:25.956494", "step": 4531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:25.988518", "step": 4531, "epoch": 2 }, { "type": "loss", "content": 0.0010734976967796683, "timestamp": "2025-09-10 02:25:26.019911", "step": 4532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:26.051703", "step": 4532, "epoch": 2 }, { "type": "loss", "content": 0.0022549789864569902, "timestamp": "2025-09-10 02:25:26.061138", "step": 4533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:26.092505", "step": 4533, "epoch": 2 }, { "type": "loss", "content": 0.005263431929051876, "timestamp": "2025-09-10 02:25:26.095987", "step": 4534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:26.127450", "step": 4534, "epoch": 2 }, { "type": "loss", "content": 0.00026051278109662235, "timestamp": "2025-09-10 02:25:26.130043", "step": 4535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:26.168166", "step": 4535, "epoch": 2 }, { "type": "loss", "content": 0.0022305804304778576, "timestamp": "2025-09-10 02:25:26.191905", "step": 4536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:26.224731", "step": 4536, "epoch": 2 }, { "type": "loss", "content": 0.0032375783193856478, "timestamp": "2025-09-10 02:25:26.237353", "step": 4537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:26.268987", "step": 4537, "epoch": 2 }, { "type": "loss", "content": 0.0010004842188209295, "timestamp": "2025-09-10 02:25:26.275711", "step": 4538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:26.308250", "step": 4538, "epoch": 2 }, { "type": "loss", "content": 0.000708259001839906, "timestamp": "2025-09-10 02:25:26.315660", "step": 4539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:26.347929", "step": 4539, "epoch": 2 }, { "type": "loss", "content": 0.020569520071148872, "timestamp": "2025-09-10 02:25:26.372621", "step": 4540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:26.405003", "step": 4540, "epoch": 2 }, { "type": "loss", "content": 0.009725110605359077, "timestamp": "2025-09-10 02:25:26.412294", "step": 4541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:26.443431", "step": 4541, "epoch": 2 }, { "type": "loss", "content": 0.003039458068087697, "timestamp": "2025-09-10 02:25:26.447287", "step": 4542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:26.478145", "step": 4542, "epoch": 2 }, { "type": "loss", "content": 0.0049249157309532166, "timestamp": "2025-09-10 02:25:26.482075", "step": 4543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:26.517540", "step": 4543, "epoch": 2 }, { "type": "loss", "content": 0.0006834098021499813, "timestamp": "2025-09-10 02:25:26.552245", "step": 4544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:26.583654", "step": 4544, "epoch": 2 }, { "type": "loss", "content": 0.00034481266629882157, "timestamp": "2025-09-10 02:25:26.587683", "step": 4545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:26.629070", "step": 4545, "epoch": 2 }, { "type": "loss", "content": 0.006935402750968933, "timestamp": "2025-09-10 02:25:26.633268", "step": 4546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:26.664858", "step": 4546, "epoch": 2 }, { "type": "loss", "content": 0.006434209179133177, "timestamp": "2025-09-10 02:25:26.672410", "step": 4547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:25:26.713922", "step": 4547, "epoch": 2 }, { "type": "loss", "content": 0.01216146256774664, "timestamp": "2025-09-10 02:25:26.751858", "step": 4548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:26.783543", "step": 4548, "epoch": 2 }, { "type": "loss", "content": 0.008372723124921322, "timestamp": "2025-09-10 02:25:26.786885", "step": 4549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:26.819839", "step": 4549, "epoch": 2 }, { "type": "loss", "content": 0.00022385262127500027, "timestamp": "2025-09-10 02:25:26.826651", "step": 4550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:26.858276", "step": 4550, "epoch": 2 }, { "type": "loss", "content": 0.007002050522714853, "timestamp": "2025-09-10 02:25:26.870536", "step": 4551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:26.903103", "step": 4551, "epoch": 2 }, { "type": "loss", "content": 0.0005396933993324637, "timestamp": "2025-09-10 02:25:26.931106", "step": 4552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:26.962944", "step": 4552, "epoch": 2 }, { "type": "loss", "content": 0.0024629905819892883, "timestamp": "2025-09-10 02:25:26.970969", "step": 4553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:27.002522", "step": 4553, "epoch": 2 }, { "type": "loss", "content": 0.00016223655256908387, "timestamp": "2025-09-10 02:25:27.006138", "step": 4554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:27.037871", "step": 4554, "epoch": 2 }, { "type": "loss", "content": 0.0015956457937136292, "timestamp": "2025-09-10 02:25:27.040497", "step": 4555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:27.071104", "step": 4555, "epoch": 2 }, { "type": "loss", "content": 0.0005312658613547683, "timestamp": "2025-09-10 02:25:27.095029", "step": 4556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:27.127004", "step": 4556, "epoch": 2 }, { "type": "loss", "content": 0.002805754542350769, "timestamp": "2025-09-10 02:25:27.129456", "step": 4557, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:25:37.248941", "step": 4557, "epoch": 2 }, { "type": "pplx", "content": 22196068.675676793, "timestamp": "2025-09-10 02:25:37.251722", "step": 4557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:37.281897", "step": 4557, "epoch": 2 }, { "type": "loss", "content": 0.0010733563685789704, "timestamp": "2025-09-10 02:25:37.285638", "step": 4558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:37.317115", "step": 4558, "epoch": 2 }, { "type": "loss", "content": 5.339493145584129e-05, "timestamp": "2025-09-10 02:25:37.324842", "step": 4559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:37.357123", "step": 4559, "epoch": 2 }, { "type": "loss", "content": 0.012812472879886627, "timestamp": "2025-09-10 02:25:37.384863", "step": 4560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:37.415764", "step": 4560, "epoch": 2 }, { "type": "loss", "content": 0.008561398833990097, "timestamp": "2025-09-10 02:25:37.418114", "step": 4561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:37.449223", "step": 4561, "epoch": 2 }, { "type": "loss", "content": 0.00012836763926316053, "timestamp": "2025-09-10 02:25:37.456292", "step": 4562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:37.486863", "step": 4562, "epoch": 2 }, { "type": "loss", "content": 0.001862238277681172, "timestamp": "2025-09-10 02:25:37.499099", "step": 4563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:37.529385", "step": 4563, "epoch": 2 }, { "type": "loss", "content": 0.0009596589952707291, "timestamp": "2025-09-10 02:25:37.557360", "step": 4564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:37.588280", "step": 4564, "epoch": 2 }, { "type": "loss", "content": 0.0013999083312228322, "timestamp": "2025-09-10 02:25:37.596221", "step": 4565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:37.627323", "step": 4565, "epoch": 2 }, { "type": "loss", "content": 0.0009914558613672853, "timestamp": "2025-09-10 02:25:37.634287", "step": 4566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:37.664921", "step": 4566, "epoch": 2 }, { "type": "loss", "content": 0.002980300458148122, "timestamp": "2025-09-10 02:25:37.672667", "step": 4567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:37.703443", "step": 4567, "epoch": 2 }, { "type": "loss", "content": 0.00020019218209199607, "timestamp": "2025-09-10 02:25:37.732258", "step": 4568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:37.763055", "step": 4568, "epoch": 2 }, { "type": "loss", "content": 0.0008754459558986127, "timestamp": "2025-09-10 02:25:37.767499", "step": 4569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:37.797578", "step": 4569, "epoch": 2 }, { "type": "loss", "content": 0.0008850363665260375, "timestamp": "2025-09-10 02:25:37.804781", "step": 4570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:37.835457", "step": 4570, "epoch": 2 }, { "type": "loss", "content": 0.0006145837833173573, "timestamp": "2025-09-10 02:25:37.842706", "step": 4571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:37.873094", "step": 4571, "epoch": 2 }, { "type": "loss", "content": 0.0006157992756925523, "timestamp": "2025-09-10 02:25:37.904941", "step": 4572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:37.935999", "step": 4572, "epoch": 2 }, { "type": "loss", "content": 0.00951096136122942, "timestamp": "2025-09-10 02:25:37.940852", "step": 4573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:37.972196", "step": 4573, "epoch": 2 }, { "type": "loss", "content": 0.034846000373363495, "timestamp": "2025-09-10 02:25:37.979654", "step": 4574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:38.011724", "step": 4574, "epoch": 2 }, { "type": "loss", "content": 0.0007103482494130731, "timestamp": "2025-09-10 02:25:38.015941", "step": 4575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:38.047396", "step": 4575, "epoch": 2 }, { "type": "loss", "content": 0.0004654258373193443, "timestamp": "2025-09-10 02:25:38.075283", "step": 4576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:38.109652", "step": 4576, "epoch": 2 }, { "type": "loss", "content": 0.002614832716062665, "timestamp": "2025-09-10 02:25:38.118519", "step": 4577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:38.150260", "step": 4577, "epoch": 2 }, { "type": "loss", "content": 0.017886726185679436, "timestamp": "2025-09-10 02:25:38.157172", "step": 4578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:38.188687", "step": 4578, "epoch": 2 }, { "type": "loss", "content": 0.00023473672627005726, "timestamp": "2025-09-10 02:25:38.195723", "step": 4579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:38.227708", "step": 4579, "epoch": 2 }, { "type": "loss", "content": 0.0004447103419806808, "timestamp": "2025-09-10 02:25:38.255265", "step": 4580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:38.288925", "step": 4580, "epoch": 2 }, { "type": "loss", "content": 0.0046869367361068726, "timestamp": "2025-09-10 02:25:38.295757", "step": 4581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:38.327961", "step": 4581, "epoch": 2 }, { "type": "loss", "content": 0.0004977317294105887, "timestamp": "2025-09-10 02:25:38.337638", "step": 4582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:38.370544", "step": 4582, "epoch": 2 }, { "type": "loss", "content": 0.0006395676755346358, "timestamp": "2025-09-10 02:25:38.382421", "step": 4583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:38.413797", "step": 4583, "epoch": 2 }, { "type": "loss", "content": 0.0017181943403556943, "timestamp": "2025-09-10 02:25:38.442673", "step": 4584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:38.474975", "step": 4584, "epoch": 2 }, { "type": "loss", "content": 0.00017127768660429865, "timestamp": "2025-09-10 02:25:38.479489", "step": 4585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:38.510902", "step": 4585, "epoch": 2 }, { "type": "loss", "content": 0.0008789977291598916, "timestamp": "2025-09-10 02:25:38.514803", "step": 4586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:38.546954", "step": 4586, "epoch": 2 }, { "type": "loss", "content": 0.000202530252863653, "timestamp": "2025-09-10 02:25:38.553713", "step": 4587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:25:38.595580", "step": 4587, "epoch": 2 }, { "type": "loss", "content": 0.02567419223487377, "timestamp": "2025-09-10 02:25:38.633726", "step": 4588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:38.665670", "step": 4588, "epoch": 2 }, { "type": "loss", "content": 0.0034661719109863043, "timestamp": "2025-09-10 02:25:38.673159", "step": 4589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:38.705050", "step": 4589, "epoch": 2 }, { "type": "loss", "content": 0.0012347496813163161, "timestamp": "2025-09-10 02:25:38.712603", "step": 4590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:38.745491", "step": 4590, "epoch": 2 }, { "type": "loss", "content": 0.0001653311337577179, "timestamp": "2025-09-10 02:25:38.752873", "step": 4591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:38.784280", "step": 4591, "epoch": 2 }, { "type": "loss", "content": 0.0006502936012111604, "timestamp": "2025-09-10 02:25:38.809196", "step": 4592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:25:38.846360", "step": 4592, "epoch": 2 }, { "type": "loss", "content": 0.00027454280643723905, "timestamp": "2025-09-10 02:25:38.861692", "step": 4593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:38.893021", "step": 4593, "epoch": 2 }, { "type": "loss", "content": 0.0001545843406347558, "timestamp": "2025-09-10 02:25:38.897115", "step": 4594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:38.927715", "step": 4594, "epoch": 2 }, { "type": "loss", "content": 0.05455930903553963, "timestamp": "2025-09-10 02:25:38.932332", "step": 4595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:38.964182", "step": 4595, "epoch": 2 }, { "type": "loss", "content": 0.00358793162740767, "timestamp": "2025-09-10 02:25:38.992494", "step": 4596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:39.024115", "step": 4596, "epoch": 2 }, { "type": "loss", "content": 0.0006846991600468755, "timestamp": "2025-09-10 02:25:39.032247", "step": 4597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:39.065109", "step": 4597, "epoch": 2 }, { "type": "loss", "content": 0.000762683164793998, "timestamp": "2025-09-10 02:25:39.072321", "step": 4598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:39.105738", "step": 4598, "epoch": 2 }, { "type": "loss", "content": 0.0005580178694799542, "timestamp": "2025-09-10 02:25:39.119148", "step": 4599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:39.149868", "step": 4599, "epoch": 2 }, { "type": "loss", "content": 0.04828514903783798, "timestamp": "2025-09-10 02:25:39.174906", "step": 4600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:39.212885", "step": 4600, "epoch": 2 }, { "type": "loss", "content": 0.0019080432830378413, "timestamp": "2025-09-10 02:25:39.217720", "step": 4601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:39.249614", "step": 4601, "epoch": 2 }, { "type": "loss", "content": 0.00021091777307447046, "timestamp": "2025-09-10 02:25:39.256791", "step": 4602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:25:39.300909", "step": 4602, "epoch": 2 }, { "type": "loss", "content": 0.0009871380170807242, "timestamp": "2025-09-10 02:25:39.318473", "step": 4603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:39.350082", "step": 4603, "epoch": 2 }, { "type": "loss", "content": 0.0004607336886692792, "timestamp": "2025-09-10 02:25:39.380362", "step": 4604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:39.411906", "step": 4604, "epoch": 2 }, { "type": "loss", "content": 0.0006659630453214049, "timestamp": "2025-09-10 02:25:39.419587", "step": 4605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:39.449968", "step": 4605, "epoch": 2 }, { "type": "loss", "content": 0.001328265992924571, "timestamp": "2025-09-10 02:25:39.456704", "step": 4606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:39.487495", "step": 4606, "epoch": 2 }, { "type": "loss", "content": 0.00017410985310561955, "timestamp": "2025-09-10 02:25:39.490210", "step": 4607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:39.521017", "step": 4607, "epoch": 2 }, { "type": "loss", "content": 0.0004943975363858044, "timestamp": "2025-09-10 02:25:39.549176", "step": 4608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:25:39.587044", "step": 4608, "epoch": 2 }, { "type": "loss", "content": 0.005744884721934795, "timestamp": "2025-09-10 02:25:39.602492", "step": 4609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:39.634627", "step": 4609, "epoch": 2 }, { "type": "loss", "content": 0.0007691961363889277, "timestamp": "2025-09-10 02:25:39.645597", "step": 4610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:39.676945", "step": 4610, "epoch": 2 }, { "type": "loss", "content": 0.00013593518815468997, "timestamp": "2025-09-10 02:25:39.684017", "step": 4611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:39.715445", "step": 4611, "epoch": 2 }, { "type": "loss", "content": 0.0014134369557723403, "timestamp": "2025-09-10 02:25:39.743812", "step": 4612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:39.775669", "step": 4612, "epoch": 2 }, { "type": "loss", "content": 0.0025131232105195522, "timestamp": "2025-09-10 02:25:39.781144", "step": 4613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:39.815817", "step": 4613, "epoch": 2 }, { "type": "loss", "content": 0.0002813456521835178, "timestamp": "2025-09-10 02:25:39.829621", "step": 4614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:39.860677", "step": 4614, "epoch": 2 }, { "type": "loss", "content": 0.003985443152487278, "timestamp": "2025-09-10 02:25:39.868027", "step": 4615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:39.899801", "step": 4615, "epoch": 2 }, { "type": "loss", "content": 6.440089055104181e-05, "timestamp": "2025-09-10 02:25:39.928044", "step": 4616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:39.958592", "step": 4616, "epoch": 2 }, { "type": "loss", "content": 0.004450418520718813, "timestamp": "2025-09-10 02:25:39.963542", "step": 4617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:39.994245", "step": 4617, "epoch": 2 }, { "type": "loss", "content": 0.0024432761128991842, "timestamp": "2025-09-10 02:25:40.004305", "step": 4618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:40.034812", "step": 4618, "epoch": 2 }, { "type": "loss", "content": 0.008134338073432446, "timestamp": "2025-09-10 02:25:40.038920", "step": 4619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:40.076477", "step": 4619, "epoch": 2 }, { "type": "loss", "content": 0.0038712576497346163, "timestamp": "2025-09-10 02:25:40.113034", "step": 4620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:40.143839", "step": 4620, "epoch": 2 }, { "type": "loss", "content": 0.00024131852842401713, "timestamp": "2025-09-10 02:25:40.148586", "step": 4621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:40.179389", "step": 4621, "epoch": 2 }, { "type": "loss", "content": 0.0002276496816193685, "timestamp": "2025-09-10 02:25:40.190457", "step": 4622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:40.227228", "step": 4622, "epoch": 2 }, { "type": "loss", "content": 0.00045795520418323576, "timestamp": "2025-09-10 02:25:40.240617", "step": 4623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:40.272382", "step": 4623, "epoch": 2 }, { "type": "loss", "content": 0.0020052941981703043, "timestamp": "2025-09-10 02:25:40.305646", "step": 4624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:40.336456", "step": 4624, "epoch": 2 }, { "type": "loss", "content": 0.0003010353248100728, "timestamp": "2025-09-10 02:25:40.341626", "step": 4625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:40.372635", "step": 4625, "epoch": 2 }, { "type": "loss", "content": 0.0003616653848439455, "timestamp": "2025-09-10 02:25:40.384970", "step": 4626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:40.415819", "step": 4626, "epoch": 2 }, { "type": "loss", "content": 0.0009788486640900373, "timestamp": "2025-09-10 02:25:40.422896", "step": 4627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:40.456481", "step": 4627, "epoch": 2 }, { "type": "loss", "content": 0.0018540980527177453, "timestamp": "2025-09-10 02:25:40.490768", "step": 4628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:40.522011", "step": 4628, "epoch": 2 }, { "type": "loss", "content": 0.00019449996761977673, "timestamp": "2025-09-10 02:25:40.532556", "step": 4629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:40.563716", "step": 4629, "epoch": 2 }, { "type": "loss", "content": 0.001505575724877417, "timestamp": "2025-09-10 02:25:40.570742", "step": 4630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:40.602398", "step": 4630, "epoch": 2 }, { "type": "loss", "content": 0.00010367255163146183, "timestamp": "2025-09-10 02:25:40.604663", "step": 4631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:40.634796", "step": 4631, "epoch": 2 }, { "type": "loss", "content": 0.00021751046006102115, "timestamp": "2025-09-10 02:25:40.658513", "step": 4632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:40.689780", "step": 4632, "epoch": 2 }, { "type": "loss", "content": 0.0005528530455194414, "timestamp": "2025-09-10 02:25:40.694627", "step": 4633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:40.725402", "step": 4633, "epoch": 2 }, { "type": "loss", "content": 0.0006717692012898624, "timestamp": "2025-09-10 02:25:40.732500", "step": 4634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:40.763952", "step": 4634, "epoch": 2 }, { "type": "loss", "content": 0.0003762389242183417, "timestamp": "2025-09-10 02:25:40.771598", "step": 4635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:40.803947", "step": 4635, "epoch": 2 }, { "type": "loss", "content": 0.0006608268013224006, "timestamp": "2025-09-10 02:25:40.832461", "step": 4636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:40.864000", "step": 4636, "epoch": 2 }, { "type": "loss", "content": 8.623411849839613e-05, "timestamp": "2025-09-10 02:25:40.871941", "step": 4637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:40.903954", "step": 4637, "epoch": 2 }, { "type": "loss", "content": 0.0006122788763605058, "timestamp": "2025-09-10 02:25:40.910802", "step": 4638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:25:40.945948", "step": 4638, "epoch": 2 }, { "type": "loss", "content": 0.002174076158553362, "timestamp": "2025-09-10 02:25:40.959955", "step": 4639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:40.995398", "step": 4639, "epoch": 2 }, { "type": "loss", "content": 0.0002304811787325889, "timestamp": "2025-09-10 02:25:41.030016", "step": 4640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:41.060128", "step": 4640, "epoch": 2 }, { "type": "loss", "content": 7.722365262452513e-05, "timestamp": "2025-09-10 02:25:41.062407", "step": 4641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:25:41.096808", "step": 4641, "epoch": 2 }, { "type": "loss", "content": 0.00041160904220305383, "timestamp": "2025-09-10 02:25:41.110676", "step": 4642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:41.141746", "step": 4642, "epoch": 2 }, { "type": "loss", "content": 0.0018459666753187776, "timestamp": "2025-09-10 02:25:41.148706", "step": 4643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:41.180226", "step": 4643, "epoch": 2 }, { "type": "loss", "content": 0.0011864164844155312, "timestamp": "2025-09-10 02:25:41.208859", "step": 4644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:41.239694", "step": 4644, "epoch": 2 }, { "type": "loss", "content": 5.6243621656904e-05, "timestamp": "2025-09-10 02:25:41.241544", "step": 4645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:41.272194", "step": 4645, "epoch": 2 }, { "type": "loss", "content": 0.054455097764730453, "timestamp": "2025-09-10 02:25:41.279208", "step": 4646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:41.309894", "step": 4646, "epoch": 2 }, { "type": "loss", "content": 0.0007867555250413716, "timestamp": "2025-09-10 02:25:41.314022", "step": 4647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:41.346859", "step": 4647, "epoch": 2 }, { "type": "loss", "content": 0.00012387036986183375, "timestamp": "2025-09-10 02:25:41.380311", "step": 4648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:41.413604", "step": 4648, "epoch": 2 }, { "type": "loss", "content": 0.00019573597819544375, "timestamp": "2025-09-10 02:25:41.418594", "step": 4649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:41.450652", "step": 4649, "epoch": 2 }, { "type": "loss", "content": 7.041559729259461e-05, "timestamp": "2025-09-10 02:25:41.458149", "step": 4650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:41.490198", "step": 4650, "epoch": 2 }, { "type": "loss", "content": 0.02323429472744465, "timestamp": "2025-09-10 02:25:41.500100", "step": 4651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:41.531296", "step": 4651, "epoch": 2 }, { "type": "loss", "content": 0.0013165498385205865, "timestamp": "2025-09-10 02:25:41.562358", "step": 4652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:41.593334", "step": 4652, "epoch": 2 }, { "type": "loss", "content": 0.0002063662832370028, "timestamp": "2025-09-10 02:25:41.595699", "step": 4653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:41.625894", "step": 4653, "epoch": 2 }, { "type": "loss", "content": 0.011784272268414497, "timestamp": "2025-09-10 02:25:41.628252", "step": 4654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:41.660002", "step": 4654, "epoch": 2 }, { "type": "loss", "content": 0.0023810744751244783, "timestamp": "2025-09-10 02:25:41.667569", "step": 4655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:41.697803", "step": 4655, "epoch": 2 }, { "type": "loss", "content": 0.0012222749646753073, "timestamp": "2025-09-10 02:25:41.723222", "step": 4656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:41.753995", "step": 4656, "epoch": 2 }, { "type": "loss", "content": 0.0006310855969786644, "timestamp": "2025-09-10 02:25:41.762630", "step": 4657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:41.793181", "step": 4657, "epoch": 2 }, { "type": "loss", "content": 6.163497891975567e-05, "timestamp": "2025-09-10 02:25:41.797539", "step": 4658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:41.829355", "step": 4658, "epoch": 2 }, { "type": "loss", "content": 0.00010095408651977777, "timestamp": "2025-09-10 02:25:41.833453", "step": 4659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:41.865602", "step": 4659, "epoch": 2 }, { "type": "loss", "content": 0.00024290040892083198, "timestamp": "2025-09-10 02:25:41.897535", "step": 4660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:41.929454", "step": 4660, "epoch": 2 }, { "type": "loss", "content": 0.0009246188565157354, "timestamp": "2025-09-10 02:25:41.942119", "step": 4661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:41.972950", "step": 4661, "epoch": 2 }, { "type": "loss", "content": 0.0011847690911963582, "timestamp": "2025-09-10 02:25:41.980351", "step": 4662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:42.019971", "step": 4662, "epoch": 2 }, { "type": "loss", "content": 0.0035353994462639093, "timestamp": "2025-09-10 02:25:42.035617", "step": 4663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:42.066434", "step": 4663, "epoch": 2 }, { "type": "loss", "content": 0.0010591925820335746, "timestamp": "2025-09-10 02:25:42.095085", "step": 4664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:25:42.134536", "step": 4664, "epoch": 2 }, { "type": "loss", "content": 0.00923153292387724, "timestamp": "2025-09-10 02:25:42.151529", "step": 4665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:42.182270", "step": 4665, "epoch": 2 }, { "type": "loss", "content": 0.000438479648437351, "timestamp": "2025-09-10 02:25:42.189395", "step": 4666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:42.226759", "step": 4666, "epoch": 2 }, { "type": "loss", "content": 0.004933382850140333, "timestamp": "2025-09-10 02:25:42.231094", "step": 4667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:42.262207", "step": 4667, "epoch": 2 }, { "type": "loss", "content": 0.018754737451672554, "timestamp": "2025-09-10 02:25:42.287551", "step": 4668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:42.319647", "step": 4668, "epoch": 2 }, { "type": "loss", "content": 0.0001720808504614979, "timestamp": "2025-09-10 02:25:42.324427", "step": 4669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:42.354995", "step": 4669, "epoch": 2 }, { "type": "loss", "content": 0.0011062477715313435, "timestamp": "2025-09-10 02:25:42.362087", "step": 4670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:42.392755", "step": 4670, "epoch": 2 }, { "type": "loss", "content": 0.00019575886835809797, "timestamp": "2025-09-10 02:25:42.403589", "step": 4671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:42.434530", "step": 4671, "epoch": 2 }, { "type": "loss", "content": 0.005415527615696192, "timestamp": "2025-09-10 02:25:42.465694", "step": 4672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:42.497197", "step": 4672, "epoch": 2 }, { "type": "loss", "content": 0.0010951546719297767, "timestamp": "2025-09-10 02:25:42.501696", "step": 4673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:25:42.532796", "step": 4673, "epoch": 2 }, { "type": "loss", "content": 0.002216489752754569, "timestamp": "2025-09-10 02:25:42.535146", "step": 4674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:42.566942", "step": 4674, "epoch": 2 }, { "type": "loss", "content": 0.0029746759682893753, "timestamp": "2025-09-10 02:25:42.571135", "step": 4675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:42.603076", "step": 4675, "epoch": 2 }, { "type": "loss", "content": 0.0008459574310109019, "timestamp": "2025-09-10 02:25:42.636460", "step": 4676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:42.667674", "step": 4676, "epoch": 2 }, { "type": "loss", "content": 0.005179825238883495, "timestamp": "2025-09-10 02:25:42.669969", "step": 4677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:42.701536", "step": 4677, "epoch": 2 }, { "type": "loss", "content": 0.020527558401226997, "timestamp": "2025-09-10 02:25:42.708350", "step": 4678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:42.743444", "step": 4678, "epoch": 2 }, { "type": "loss", "content": 0.0006391909555532038, "timestamp": "2025-09-10 02:25:42.757207", "step": 4679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:42.791612", "step": 4679, "epoch": 2 }, { "type": "loss", "content": 0.00017153627413790673, "timestamp": "2025-09-10 02:25:42.826189", "step": 4680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:42.857263", "step": 4680, "epoch": 2 }, { "type": "loss", "content": 6.232234591152519e-05, "timestamp": "2025-09-10 02:25:42.862050", "step": 4681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:42.893169", "step": 4681, "epoch": 2 }, { "type": "loss", "content": 0.0010045451344922185, "timestamp": "2025-09-10 02:25:42.904132", "step": 4682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:42.934949", "step": 4682, "epoch": 2 }, { "type": "loss", "content": 0.0005457932711578906, "timestamp": "2025-09-10 02:25:42.945212", "step": 4683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:25:42.980176", "step": 4683, "epoch": 2 }, { "type": "loss", "content": 0.0005815924378111959, "timestamp": "2025-09-10 02:25:43.015032", "step": 4684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:25:43.054138", "step": 4684, "epoch": 2 }, { "type": "loss", "content": 0.005409900564700365, "timestamp": "2025-09-10 02:25:43.070846", "step": 4685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:43.102700", "step": 4685, "epoch": 2 }, { "type": "loss", "content": 0.00027891225181519985, "timestamp": "2025-09-10 02:25:43.107089", "step": 4686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:43.137992", "step": 4686, "epoch": 2 }, { "type": "loss", "content": 0.00238403445109725, "timestamp": "2025-09-10 02:25:43.142416", "step": 4687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:43.175928", "step": 4687, "epoch": 2 }, { "type": "loss", "content": 0.0003628423437476158, "timestamp": "2025-09-10 02:25:43.210185", "step": 4688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:43.240934", "step": 4688, "epoch": 2 }, { "type": "loss", "content": 0.005772776901721954, "timestamp": "2025-09-10 02:25:43.248672", "step": 4689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:43.279895", "step": 4689, "epoch": 2 }, { "type": "loss", "content": 5.581247023656033e-05, "timestamp": "2025-09-10 02:25:43.287182", "step": 4690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:43.318782", "step": 4690, "epoch": 2 }, { "type": "loss", "content": 0.001327107078395784, "timestamp": "2025-09-10 02:25:43.331000", "step": 4691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:43.361819", "step": 4691, "epoch": 2 }, { "type": "loss", "content": 0.039219219237565994, "timestamp": "2025-09-10 02:25:43.386848", "step": 4692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:43.420335", "step": 4692, "epoch": 2 }, { "type": "loss", "content": 8.427041029790416e-05, "timestamp": "2025-09-10 02:25:43.425397", "step": 4693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:43.456992", "step": 4693, "epoch": 2 }, { "type": "loss", "content": 0.00012090602103853598, "timestamp": "2025-09-10 02:25:43.463920", "step": 4694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:25:43.512243", "step": 4694, "epoch": 2 }, { "type": "loss", "content": 0.0006930717499926686, "timestamp": "2025-09-10 02:25:43.529330", "step": 4695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:43.566468", "step": 4695, "epoch": 2 }, { "type": "loss", "content": 0.0009244754328392446, "timestamp": "2025-09-10 02:25:43.594568", "step": 4696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:43.626716", "step": 4696, "epoch": 2 }, { "type": "loss", "content": 0.005284165497869253, "timestamp": "2025-09-10 02:25:43.631652", "step": 4697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:43.665532", "step": 4697, "epoch": 2 }, { "type": "loss", "content": 0.0008305375231429935, "timestamp": "2025-09-10 02:25:43.678870", "step": 4698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:25:43.719171", "step": 4698, "epoch": 2 }, { "type": "loss", "content": 0.0010617909720167518, "timestamp": "2025-09-10 02:25:43.735466", "step": 4699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:43.768609", "step": 4699, "epoch": 2 }, { "type": "loss", "content": 0.0003633351589087397, "timestamp": "2025-09-10 02:25:43.797140", "step": 4700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:43.827485", "step": 4700, "epoch": 2 }, { "type": "loss", "content": 0.001146303373388946, "timestamp": "2025-09-10 02:25:43.833032", "step": 4701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:43.864017", "step": 4701, "epoch": 2 }, { "type": "loss", "content": 0.0001229366025654599, "timestamp": "2025-09-10 02:25:43.871844", "step": 4702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:43.903234", "step": 4702, "epoch": 2 }, { "type": "loss", "content": 0.0005992205115035176, "timestamp": "2025-09-10 02:25:43.910729", "step": 4703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:43.942758", "step": 4703, "epoch": 2 }, { "type": "loss", "content": 0.0020961128175258636, "timestamp": "2025-09-10 02:25:43.971239", "step": 4704, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:25:54.264978", "step": 4704, "epoch": 2 }, { "type": "pplx", "content": 22252700.049582753, "timestamp": "2025-09-10 02:25:54.267701", "step": 4704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:54.299493", "step": 4704, "epoch": 2 }, { "type": "loss", "content": 0.0002747270918916911, "timestamp": "2025-09-10 02:25:54.306002", "step": 4705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:54.337028", "step": 4705, "epoch": 2 }, { "type": "loss", "content": 0.0001008029212243855, "timestamp": "2025-09-10 02:25:54.341004", "step": 4706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:54.374810", "step": 4706, "epoch": 2 }, { "type": "loss", "content": 0.0037881359457969666, "timestamp": "2025-09-10 02:25:54.388470", "step": 4707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:54.419023", "step": 4707, "epoch": 2 }, { "type": "loss", "content": 0.00021644457592628896, "timestamp": "2025-09-10 02:25:54.447372", "step": 4708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:25:54.500651", "step": 4708, "epoch": 2 }, { "type": "loss", "content": 0.0004264476883690804, "timestamp": "2025-09-10 02:25:54.524182", "step": 4709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:54.555932", "step": 4709, "epoch": 2 }, { "type": "loss", "content": 0.00112416862975806, "timestamp": "2025-09-10 02:25:54.566184", "step": 4710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:54.602033", "step": 4710, "epoch": 2 }, { "type": "loss", "content": 0.001086343778297305, "timestamp": "2025-09-10 02:25:54.609220", "step": 4711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:54.641173", "step": 4711, "epoch": 2 }, { "type": "loss", "content": 0.0024282841477543116, "timestamp": "2025-09-10 02:25:54.666306", "step": 4712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:25:54.711375", "step": 4712, "epoch": 2 }, { "type": "loss", "content": 0.00020581232092808932, "timestamp": "2025-09-10 02:25:54.730406", "step": 4713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 3, 224 ], "flops": 4983601869792 }, "timestamp": "2025-09-10 02:25:54.764147", "step": 4713, "epoch": 2 }, { "type": "loss", "content": 0.00017851527081802487, "timestamp": "2025-09-10 02:25:54.767859", "step": 4714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:54.820878", "step": 4714, "epoch": 3 }, { "type": "loss", "content": 7.199771062005311e-05, "timestamp": "2025-09-10 02:25:54.825650", "step": 4715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:54.860167", "step": 4715, "epoch": 3 }, { "type": "loss", "content": 0.0005822144448757172, "timestamp": "2025-09-10 02:25:54.888916", "step": 4716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:54.925746", "step": 4716, "epoch": 3 }, { "type": "loss", "content": 0.0002994556853082031, "timestamp": "2025-09-10 02:25:54.930312", "step": 4717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:54.966869", "step": 4717, "epoch": 3 }, { "type": "loss", "content": 6.113481504144147e-05, "timestamp": "2025-09-10 02:25:54.973429", "step": 4718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:55.010593", "step": 4718, "epoch": 3 }, { "type": "loss", "content": 6.410970672732219e-05, "timestamp": "2025-09-10 02:25:55.017589", "step": 4719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:55.049280", "step": 4719, "epoch": 3 }, { "type": "loss", "content": 0.0018373571801930666, "timestamp": "2025-09-10 02:25:55.076029", "step": 4720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:55.108559", "step": 4720, "epoch": 3 }, { "type": "loss", "content": 0.005885870661586523, "timestamp": "2025-09-10 02:25:55.112996", "step": 4721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:55.143782", "step": 4721, "epoch": 3 }, { "type": "loss", "content": 0.000682682148180902, "timestamp": "2025-09-10 02:25:55.151398", "step": 4722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:55.187563", "step": 4722, "epoch": 3 }, { "type": "loss", "content": 0.0009401330025866628, "timestamp": "2025-09-10 02:25:55.197939", "step": 4723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:55.228686", "step": 4723, "epoch": 3 }, { "type": "loss", "content": 9.497522114543244e-05, "timestamp": "2025-09-10 02:25:55.257550", "step": 4724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:55.288450", "step": 4724, "epoch": 3 }, { "type": "loss", "content": 0.0003811214992310852, "timestamp": "2025-09-10 02:25:55.293082", "step": 4725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:55.322824", "step": 4725, "epoch": 3 }, { "type": "loss", "content": 0.003699273569509387, "timestamp": "2025-09-10 02:25:55.329732", "step": 4726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:55.360246", "step": 4726, "epoch": 3 }, { "type": "loss", "content": 0.0002697373856790364, "timestamp": "2025-09-10 02:25:55.364754", "step": 4727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:55.398993", "step": 4727, "epoch": 3 }, { "type": "loss", "content": 0.0001845106016844511, "timestamp": "2025-09-10 02:25:55.433261", "step": 4728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:55.465486", "step": 4728, "epoch": 3 }, { "type": "loss", "content": 0.0001434317382518202, "timestamp": "2025-09-10 02:25:55.467733", "step": 4729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:55.499235", "step": 4729, "epoch": 3 }, { "type": "loss", "content": 8.866209100233391e-05, "timestamp": "2025-09-10 02:25:55.503347", "step": 4730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:55.534093", "step": 4730, "epoch": 3 }, { "type": "loss", "content": 0.0001506084663560614, "timestamp": "2025-09-10 02:25:55.546333", "step": 4731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:55.576315", "step": 4731, "epoch": 3 }, { "type": "loss", "content": 0.0013919537886977196, "timestamp": "2025-09-10 02:25:55.601714", "step": 4732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:55.642361", "step": 4732, "epoch": 3 }, { "type": "loss", "content": 0.00031470126123167574, "timestamp": "2025-09-10 02:25:55.649630", "step": 4733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:55.692765", "step": 4733, "epoch": 3 }, { "type": "loss", "content": 0.0006087294896133244, "timestamp": "2025-09-10 02:25:55.698471", "step": 4734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:55.731178", "step": 4734, "epoch": 3 }, { "type": "loss", "content": 0.0029154361691325903, "timestamp": "2025-09-10 02:25:55.741490", "step": 4735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:55.773923", "step": 4735, "epoch": 3 }, { "type": "loss", "content": 0.0005959026166237891, "timestamp": "2025-09-10 02:25:55.799731", "step": 4736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:55.832859", "step": 4736, "epoch": 3 }, { "type": "loss", "content": 0.0007912717992439866, "timestamp": "2025-09-10 02:25:55.845448", "step": 4737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:55.878382", "step": 4737, "epoch": 3 }, { "type": "loss", "content": 8.05290910648182e-05, "timestamp": "2025-09-10 02:25:55.885600", "step": 4738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:55.916824", "step": 4738, "epoch": 3 }, { "type": "loss", "content": 0.0016422310145571828, "timestamp": "2025-09-10 02:25:55.923786", "step": 4739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:55.957047", "step": 4739, "epoch": 3 }, { "type": "loss", "content": 0.00033386718132533133, "timestamp": "2025-09-10 02:25:55.984672", "step": 4740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:56.018814", "step": 4740, "epoch": 3 }, { "type": "loss", "content": 0.0002576867409516126, "timestamp": "2025-09-10 02:25:56.023115", "step": 4741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:56.055754", "step": 4741, "epoch": 3 }, { "type": "loss", "content": 0.0020760188344866037, "timestamp": "2025-09-10 02:25:56.058458", "step": 4742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:56.090856", "step": 4742, "epoch": 3 }, { "type": "loss", "content": 0.004879082087427378, "timestamp": "2025-09-10 02:25:56.098101", "step": 4743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:56.132776", "step": 4743, "epoch": 3 }, { "type": "loss", "content": 4.860662738792598e-05, "timestamp": "2025-09-10 02:25:56.167244", "step": 4744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:56.200709", "step": 4744, "epoch": 3 }, { "type": "loss", "content": 0.0001819897734094411, "timestamp": "2025-09-10 02:25:56.204983", "step": 4745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:56.236803", "step": 4745, "epoch": 3 }, { "type": "loss", "content": 0.00019539693312253803, "timestamp": "2025-09-10 02:25:56.243514", "step": 4746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:56.286988", "step": 4746, "epoch": 3 }, { "type": "loss", "content": 0.0030586186330765486, "timestamp": "2025-09-10 02:25:56.291110", "step": 4747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:56.322545", "step": 4747, "epoch": 3 }, { "type": "loss", "content": 0.00042248849058523774, "timestamp": "2025-09-10 02:25:56.351141", "step": 4748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:56.382291", "step": 4748, "epoch": 3 }, { "type": "loss", "content": 0.0006272942409850657, "timestamp": "2025-09-10 02:25:56.389844", "step": 4749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:56.421653", "step": 4749, "epoch": 3 }, { "type": "loss", "content": 0.00040742545388638973, "timestamp": "2025-09-10 02:25:56.425826", "step": 4750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:56.458601", "step": 4750, "epoch": 3 }, { "type": "loss", "content": 0.00014489439490716904, "timestamp": "2025-09-10 02:25:56.465546", "step": 4751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:56.499456", "step": 4751, "epoch": 3 }, { "type": "loss", "content": 0.000770228507462889, "timestamp": "2025-09-10 02:25:56.523257", "step": 4752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:56.555302", "step": 4752, "epoch": 3 }, { "type": "loss", "content": 0.00037505757063627243, "timestamp": "2025-09-10 02:25:56.557426", "step": 4753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:56.587932", "step": 4753, "epoch": 3 }, { "type": "loss", "content": 0.00023457292991224676, "timestamp": "2025-09-10 02:25:56.595023", "step": 4754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:56.626289", "step": 4754, "epoch": 3 }, { "type": "loss", "content": 0.00568475853651762, "timestamp": "2025-09-10 02:25:56.633673", "step": 4755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:56.665423", "step": 4755, "epoch": 3 }, { "type": "loss", "content": 0.0008253042469732463, "timestamp": "2025-09-10 02:25:56.690687", "step": 4756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:56.722401", "step": 4756, "epoch": 3 }, { "type": "loss", "content": 0.0006686433334834874, "timestamp": "2025-09-10 02:25:56.734993", "step": 4757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:56.765610", "step": 4757, "epoch": 3 }, { "type": "loss", "content": 0.0004306059854570776, "timestamp": "2025-09-10 02:25:56.772714", "step": 4758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:56.802645", "step": 4758, "epoch": 3 }, { "type": "loss", "content": 0.03919557109475136, "timestamp": "2025-09-10 02:25:56.806926", "step": 4759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:56.837195", "step": 4759, "epoch": 3 }, { "type": "loss", "content": 0.0008901845430955291, "timestamp": "2025-09-10 02:25:56.865124", "step": 4760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:56.899149", "step": 4760, "epoch": 3 }, { "type": "loss", "content": 0.00022610726591665298, "timestamp": "2025-09-10 02:25:56.906750", "step": 4761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:56.940355", "step": 4761, "epoch": 3 }, { "type": "loss", "content": 6.875943654449657e-05, "timestamp": "2025-09-10 02:25:56.947266", "step": 4762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:56.978084", "step": 4762, "epoch": 3 }, { "type": "loss", "content": 0.0004053361772093922, "timestamp": "2025-09-10 02:25:56.989159", "step": 4763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:57.022371", "step": 4763, "epoch": 3 }, { "type": "loss", "content": 0.0025856548454612494, "timestamp": "2025-09-10 02:25:57.054174", "step": 4764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:57.085349", "step": 4764, "epoch": 3 }, { "type": "loss", "content": 0.00020959861285518855, "timestamp": "2025-09-10 02:25:57.087517", "step": 4765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 928 ], "flops": 27527278844800 }, "timestamp": "2025-09-10 02:25:57.164501", "step": 4765, "epoch": 3 }, { "type": "loss", "content": 0.000836056366097182, "timestamp": "2025-09-10 02:25:57.196102", "step": 4766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:57.227009", "step": 4766, "epoch": 3 }, { "type": "loss", "content": 0.0007483740919269621, "timestamp": "2025-09-10 02:25:57.231213", "step": 4767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:57.261606", "step": 4767, "epoch": 3 }, { "type": "loss", "content": 0.00014812864537816495, "timestamp": "2025-09-10 02:25:57.292657", "step": 4768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:57.323573", "step": 4768, "epoch": 3 }, { "type": "loss", "content": 0.0001649035548325628, "timestamp": "2025-09-10 02:25:57.328618", "step": 4769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:57.367308", "step": 4769, "epoch": 3 }, { "type": "loss", "content": 2.814439358189702e-05, "timestamp": "2025-09-10 02:25:57.382928", "step": 4770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:25:57.413769", "step": 4770, "epoch": 3 }, { "type": "loss", "content": 0.0018101210007444024, "timestamp": "2025-09-10 02:25:57.415992", "step": 4771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:57.446752", "step": 4771, "epoch": 3 }, { "type": "loss", "content": 0.0005368964048102498, "timestamp": "2025-09-10 02:25:57.474370", "step": 4772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:57.504468", "step": 4772, "epoch": 3 }, { "type": "loss", "content": 0.0005379213253036141, "timestamp": "2025-09-10 02:25:57.512288", "step": 4773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:57.543048", "step": 4773, "epoch": 3 }, { "type": "loss", "content": 0.0007029054104350507, "timestamp": "2025-09-10 02:25:57.550992", "step": 4774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:57.583074", "step": 4774, "epoch": 3 }, { "type": "loss", "content": 0.017190443351864815, "timestamp": "2025-09-10 02:25:57.595255", "step": 4775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:57.627841", "step": 4775, "epoch": 3 }, { "type": "loss", "content": 0.0006791255436837673, "timestamp": "2025-09-10 02:25:57.655695", "step": 4776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:57.688970", "step": 4776, "epoch": 3 }, { "type": "loss", "content": 4.802838520845398e-05, "timestamp": "2025-09-10 02:25:57.694277", "step": 4777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:57.726991", "step": 4777, "epoch": 3 }, { "type": "loss", "content": 0.0004995018825866282, "timestamp": "2025-09-10 02:25:57.734016", "step": 4778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:25:57.774778", "step": 4778, "epoch": 3 }, { "type": "loss", "content": 3.5099070373689756e-05, "timestamp": "2025-09-10 02:25:57.790648", "step": 4779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:57.822302", "step": 4779, "epoch": 3 }, { "type": "loss", "content": 0.00015079900913406163, "timestamp": "2025-09-10 02:25:57.850184", "step": 4780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:57.880239", "step": 4780, "epoch": 3 }, { "type": "loss", "content": 0.0010758963180705905, "timestamp": "2025-09-10 02:25:57.882597", "step": 4781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:57.912637", "step": 4781, "epoch": 3 }, { "type": "loss", "content": 0.022532150149345398, "timestamp": "2025-09-10 02:25:57.917341", "step": 4782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:57.949411", "step": 4782, "epoch": 3 }, { "type": "loss", "content": 0.006792863365262747, "timestamp": "2025-09-10 02:25:57.957123", "step": 4783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:25:57.996736", "step": 4783, "epoch": 3 }, { "type": "loss", "content": 0.00010572600876912475, "timestamp": "2025-09-10 02:25:58.033523", "step": 4784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:58.070175", "step": 4784, "epoch": 3 }, { "type": "loss", "content": 0.00010956094047287479, "timestamp": "2025-09-10 02:25:58.075183", "step": 4785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:25:58.106182", "step": 4785, "epoch": 3 }, { "type": "loss", "content": 0.000302365719107911, "timestamp": "2025-09-10 02:25:58.118711", "step": 4786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:58.148933", "step": 4786, "epoch": 3 }, { "type": "loss", "content": 0.022172143682837486, "timestamp": "2025-09-10 02:25:58.156099", "step": 4787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:58.188968", "step": 4787, "epoch": 3 }, { "type": "loss", "content": 9.383214637637138e-05, "timestamp": "2025-09-10 02:25:58.217333", "step": 4788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:58.248652", "step": 4788, "epoch": 3 }, { "type": "loss", "content": 0.0019239891553297639, "timestamp": "2025-09-10 02:25:58.259436", "step": 4789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:58.301360", "step": 4789, "epoch": 3 }, { "type": "loss", "content": 0.00011966370948357508, "timestamp": "2025-09-10 02:25:58.316977", "step": 4790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:58.350131", "step": 4790, "epoch": 3 }, { "type": "loss", "content": 0.00045444341958500445, "timestamp": "2025-09-10 02:25:58.356818", "step": 4791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:25:58.391166", "step": 4791, "epoch": 3 }, { "type": "loss", "content": 0.00012033795792376623, "timestamp": "2025-09-10 02:25:58.415950", "step": 4792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:58.448459", "step": 4792, "epoch": 3 }, { "type": "loss", "content": 0.0009351377957500517, "timestamp": "2025-09-10 02:25:58.461022", "step": 4793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:25:58.498355", "step": 4793, "epoch": 3 }, { "type": "loss", "content": 0.011824551038444042, "timestamp": "2025-09-10 02:25:58.513914", "step": 4794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:58.544714", "step": 4794, "epoch": 3 }, { "type": "loss", "content": 0.004735906142741442, "timestamp": "2025-09-10 02:25:58.549049", "step": 4795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:58.580216", "step": 4795, "epoch": 3 }, { "type": "loss", "content": 0.010355941019952297, "timestamp": "2025-09-10 02:25:58.613219", "step": 4796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:58.653939", "step": 4796, "epoch": 3 }, { "type": "loss", "content": 0.03133935481309891, "timestamp": "2025-09-10 02:25:58.661346", "step": 4797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:58.695774", "step": 4797, "epoch": 3 }, { "type": "loss", "content": 5.2887880883645266e-05, "timestamp": "2025-09-10 02:25:58.700167", "step": 4798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:58.739637", "step": 4798, "epoch": 3 }, { "type": "loss", "content": 0.0006416105316020548, "timestamp": "2025-09-10 02:25:58.746381", "step": 4799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:25:58.783249", "step": 4799, "epoch": 3 }, { "type": "loss", "content": 0.0007519474602304399, "timestamp": "2025-09-10 02:25:58.817805", "step": 4800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:58.851422", "step": 4800, "epoch": 3 }, { "type": "loss", "content": 0.0013969124993309379, "timestamp": "2025-09-10 02:25:58.853678", "step": 4801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:58.885301", "step": 4801, "epoch": 3 }, { "type": "loss", "content": 0.00015225332754198462, "timestamp": "2025-09-10 02:25:58.892596", "step": 4802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:25:58.935774", "step": 4802, "epoch": 3 }, { "type": "loss", "content": 0.002063736552372575, "timestamp": "2025-09-10 02:25:58.953398", "step": 4803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:58.994738", "step": 4803, "epoch": 3 }, { "type": "loss", "content": 0.00044029252603650093, "timestamp": "2025-09-10 02:25:59.022422", "step": 4804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:59.063004", "step": 4804, "epoch": 3 }, { "type": "loss", "content": 0.00024363842385355383, "timestamp": "2025-09-10 02:25:59.067300", "step": 4805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:25:59.104097", "step": 4805, "epoch": 3 }, { "type": "loss", "content": 2.4000339180929586e-05, "timestamp": "2025-09-10 02:25:59.117432", "step": 4806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:59.160602", "step": 4806, "epoch": 3 }, { "type": "loss", "content": 0.027750907465815544, "timestamp": "2025-09-10 02:25:59.165189", "step": 4807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:59.198218", "step": 4807, "epoch": 3 }, { "type": "loss", "content": 7.746334449620917e-05, "timestamp": "2025-09-10 02:25:59.226048", "step": 4808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:25:59.267682", "step": 4808, "epoch": 3 }, { "type": "loss", "content": 0.004865474067628384, "timestamp": "2025-09-10 02:25:59.284691", "step": 4809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:59.318826", "step": 4809, "epoch": 3 }, { "type": "loss", "content": 0.0006346892914734781, "timestamp": "2025-09-10 02:25:59.325602", "step": 4810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:59.358416", "step": 4810, "epoch": 3 }, { "type": "loss", "content": 0.0009411797509528697, "timestamp": "2025-09-10 02:25:59.365575", "step": 4811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:59.398191", "step": 4811, "epoch": 3 }, { "type": "loss", "content": 7.868631655583158e-05, "timestamp": "2025-09-10 02:25:59.423313", "step": 4812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:25:59.454662", "step": 4812, "epoch": 3 }, { "type": "loss", "content": 0.00019831575627904385, "timestamp": "2025-09-10 02:25:59.457327", "step": 4813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:59.493006", "step": 4813, "epoch": 3 }, { "type": "loss", "content": 0.005292465444654226, "timestamp": "2025-09-10 02:25:59.500034", "step": 4814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:25:59.531461", "step": 4814, "epoch": 3 }, { "type": "loss", "content": 0.013571852818131447, "timestamp": "2025-09-10 02:25:59.534195", "step": 4815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:59.566265", "step": 4815, "epoch": 3 }, { "type": "loss", "content": 0.0036411576438695192, "timestamp": "2025-09-10 02:25:59.594002", "step": 4816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:59.628397", "step": 4816, "epoch": 3 }, { "type": "loss", "content": 0.03516482934355736, "timestamp": "2025-09-10 02:25:59.633370", "step": 4817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:25:59.669479", "step": 4817, "epoch": 3 }, { "type": "loss", "content": 8.574579987907782e-05, "timestamp": "2025-09-10 02:25:59.681420", "step": 4818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:25:59.714869", "step": 4818, "epoch": 3 }, { "type": "loss", "content": 4.725396502180956e-05, "timestamp": "2025-09-10 02:25:59.725620", "step": 4819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:25:59.759943", "step": 4819, "epoch": 3 }, { "type": "loss", "content": 0.0006414660601876676, "timestamp": "2025-09-10 02:25:59.787921", "step": 4820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:25:59.821105", "step": 4820, "epoch": 3 }, { "type": "loss", "content": 0.026372602209448814, "timestamp": "2025-09-10 02:25:59.825502", "step": 4821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:25:59.859168", "step": 4821, "epoch": 3 }, { "type": "loss", "content": 0.004305239766836166, "timestamp": "2025-09-10 02:25:59.866853", "step": 4822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:25:59.899931", "step": 4822, "epoch": 3 }, { "type": "loss", "content": 0.001554366433992982, "timestamp": "2025-09-10 02:25:59.909434", "step": 4823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:25:59.943662", "step": 4823, "epoch": 3 }, { "type": "loss", "content": 1.843928112066351e-05, "timestamp": "2025-09-10 02:25:59.971429", "step": 4824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:00.006017", "step": 4824, "epoch": 3 }, { "type": "loss", "content": 0.00037253022310324013, "timestamp": "2025-09-10 02:26:00.016360", "step": 4825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:00.047393", "step": 4825, "epoch": 3 }, { "type": "loss", "content": 0.00015221700596157461, "timestamp": "2025-09-10 02:26:00.054706", "step": 4826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:00.087117", "step": 4826, "epoch": 3 }, { "type": "loss", "content": 0.0029021056834608316, "timestamp": "2025-09-10 02:26:00.094410", "step": 4827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:00.128880", "step": 4827, "epoch": 3 }, { "type": "loss", "content": 0.008609069511294365, "timestamp": "2025-09-10 02:26:00.156657", "step": 4828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:00.190329", "step": 4828, "epoch": 3 }, { "type": "loss", "content": 0.01999555341899395, "timestamp": "2025-09-10 02:26:00.195215", "step": 4829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:00.226485", "step": 4829, "epoch": 3 }, { "type": "loss", "content": 0.0010207198793068528, "timestamp": "2025-09-10 02:26:00.233313", "step": 4830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:00.270031", "step": 4830, "epoch": 3 }, { "type": "loss", "content": 0.00045846131979487836, "timestamp": "2025-09-10 02:26:00.283364", "step": 4831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:00.316424", "step": 4831, "epoch": 3 }, { "type": "loss", "content": 0.019452109932899475, "timestamp": "2025-09-10 02:26:00.344133", "step": 4832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:00.379059", "step": 4832, "epoch": 3 }, { "type": "loss", "content": 0.028431635349988937, "timestamp": "2025-09-10 02:26:00.389300", "step": 4833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:00.425448", "step": 4833, "epoch": 3 }, { "type": "loss", "content": 0.0021825884468853474, "timestamp": "2025-09-10 02:26:00.438782", "step": 4834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:00.472698", "step": 4834, "epoch": 3 }, { "type": "loss", "content": 0.0035446875263005495, "timestamp": "2025-09-10 02:26:00.484427", "step": 4835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:00.515445", "step": 4835, "epoch": 3 }, { "type": "loss", "content": 0.008904158137738705, "timestamp": "2025-09-10 02:26:00.546371", "step": 4836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:00.577790", "step": 4836, "epoch": 3 }, { "type": "loss", "content": 0.025139760226011276, "timestamp": "2025-09-10 02:26:00.582495", "step": 4837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:00.613669", "step": 4837, "epoch": 3 }, { "type": "loss", "content": 0.02474926970899105, "timestamp": "2025-09-10 02:26:00.624621", "step": 4838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:00.655331", "step": 4838, "epoch": 3 }, { "type": "loss", "content": 0.005889459978789091, "timestamp": "2025-09-10 02:26:00.658335", "step": 4839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:00.690023", "step": 4839, "epoch": 3 }, { "type": "loss", "content": 0.0004479756171349436, "timestamp": "2025-09-10 02:26:00.721142", "step": 4840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:00.754422", "step": 4840, "epoch": 3 }, { "type": "loss", "content": 0.00183139240834862, "timestamp": "2025-09-10 02:26:00.767786", "step": 4841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:00.800060", "step": 4841, "epoch": 3 }, { "type": "loss", "content": 0.0007262559956870973, "timestamp": "2025-09-10 02:26:00.802493", "step": 4842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:00.833909", "step": 4842, "epoch": 3 }, { "type": "loss", "content": 0.0002282061759615317, "timestamp": "2025-09-10 02:26:00.846515", "step": 4843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:00.876802", "step": 4843, "epoch": 3 }, { "type": "loss", "content": 0.00018490191723685712, "timestamp": "2025-09-10 02:26:00.900295", "step": 4844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:00.930704", "step": 4844, "epoch": 3 }, { "type": "loss", "content": 0.037172622978687286, "timestamp": "2025-09-10 02:26:00.935978", "step": 4845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:00.966702", "step": 4845, "epoch": 3 }, { "type": "loss", "content": 0.0007538548088632524, "timestamp": "2025-09-10 02:26:00.970742", "step": 4846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:01.001708", "step": 4846, "epoch": 3 }, { "type": "loss", "content": 0.003542929422110319, "timestamp": "2025-09-10 02:26:01.009177", "step": 4847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:01.039473", "step": 4847, "epoch": 3 }, { "type": "loss", "content": 0.0006386330351233482, "timestamp": "2025-09-10 02:26:01.068042", "step": 4848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:01.098644", "step": 4848, "epoch": 3 }, { "type": "loss", "content": 0.0007097636116668582, "timestamp": "2025-09-10 02:26:01.108611", "step": 4849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:01.139356", "step": 4849, "epoch": 3 }, { "type": "loss", "content": 0.0003923158801626414, "timestamp": "2025-09-10 02:26:01.146692", "step": 4850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:01.176912", "step": 4850, "epoch": 3 }, { "type": "loss", "content": 0.001541634788736701, "timestamp": "2025-09-10 02:26:01.189033", "step": 4851, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:26:11.251927", "step": 4851, "epoch": 3 }, { "type": "pplx", "content": 22527274.187912628, "timestamp": "2025-09-10 02:26:11.255094", "step": 4851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:11.285937", "step": 4851, "epoch": 3 }, { "type": "loss", "content": 0.0015954956179484725, "timestamp": "2025-09-10 02:26:11.312678", "step": 4852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:11.346481", "step": 4852, "epoch": 3 }, { "type": "loss", "content": 0.00044376106234267354, "timestamp": "2025-09-10 02:26:11.351218", "step": 4853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:11.383233", "step": 4853, "epoch": 3 }, { "type": "loss", "content": 0.0005625728517770767, "timestamp": "2025-09-10 02:26:11.390990", "step": 4854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:11.428386", "step": 4854, "epoch": 3 }, { "type": "loss", "content": 0.009019298478960991, "timestamp": "2025-09-10 02:26:11.442133", "step": 4855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:11.473458", "step": 4855, "epoch": 3 }, { "type": "loss", "content": 0.00413868110626936, "timestamp": "2025-09-10 02:26:11.501704", "step": 4856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:11.532532", "step": 4856, "epoch": 3 }, { "type": "loss", "content": 0.003734805155545473, "timestamp": "2025-09-10 02:26:11.537572", "step": 4857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:11.568519", "step": 4857, "epoch": 3 }, { "type": "loss", "content": 0.00028699575341306627, "timestamp": "2025-09-10 02:26:11.576305", "step": 4858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:11.607157", "step": 4858, "epoch": 3 }, { "type": "loss", "content": 0.00013391379616223276, "timestamp": "2025-09-10 02:26:11.617438", "step": 4859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:11.649472", "step": 4859, "epoch": 3 }, { "type": "loss", "content": 0.008628031238913536, "timestamp": "2025-09-10 02:26:11.677213", "step": 4860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:11.711931", "step": 4860, "epoch": 3 }, { "type": "loss", "content": 0.0020894031040370464, "timestamp": "2025-09-10 02:26:11.719440", "step": 4861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:11.752557", "step": 4861, "epoch": 3 }, { "type": "loss", "content": 0.005519864149391651, "timestamp": "2025-09-10 02:26:11.760336", "step": 4862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:11.792765", "step": 4862, "epoch": 3 }, { "type": "loss", "content": 0.005144777707755566, "timestamp": "2025-09-10 02:26:11.799769", "step": 4863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:11.830246", "step": 4863, "epoch": 3 }, { "type": "loss", "content": 0.00043783331057056785, "timestamp": "2025-09-10 02:26:11.857871", "step": 4864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:26:11.890550", "step": 4864, "epoch": 3 }, { "type": "loss", "content": 0.005862680729478598, "timestamp": "2025-09-10 02:26:11.903662", "step": 4865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:11.934633", "step": 4865, "epoch": 3 }, { "type": "loss", "content": 0.007736521307379007, "timestamp": "2025-09-10 02:26:11.945036", "step": 4866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:11.975688", "step": 4866, "epoch": 3 }, { "type": "loss", "content": 0.004861840512603521, "timestamp": "2025-09-10 02:26:11.977952", "step": 4867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:12.008274", "step": 4867, "epoch": 3 }, { "type": "loss", "content": 0.005645510274916887, "timestamp": "2025-09-10 02:26:12.036047", "step": 4868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:12.067329", "step": 4868, "epoch": 3 }, { "type": "loss", "content": 0.002813952276483178, "timestamp": "2025-09-10 02:26:12.077850", "step": 4869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:12.108352", "step": 4869, "epoch": 3 }, { "type": "loss", "content": 0.005273285787552595, "timestamp": "2025-09-10 02:26:12.112325", "step": 4870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:12.143472", "step": 4870, "epoch": 3 }, { "type": "loss", "content": 0.008787405677139759, "timestamp": "2025-09-10 02:26:12.145799", "step": 4871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:12.178570", "step": 4871, "epoch": 3 }, { "type": "loss", "content": 0.001541761914268136, "timestamp": "2025-09-10 02:26:12.210555", "step": 4872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:12.245477", "step": 4872, "epoch": 3 }, { "type": "loss", "content": 0.011291869916021824, "timestamp": "2025-09-10 02:26:12.253776", "step": 4873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:12.286054", "step": 4873, "epoch": 3 }, { "type": "loss", "content": 0.001998367952182889, "timestamp": "2025-09-10 02:26:12.293055", "step": 4874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:12.329385", "step": 4874, "epoch": 3 }, { "type": "loss", "content": 0.005770617164671421, "timestamp": "2025-09-10 02:26:12.333776", "step": 4875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:12.365533", "step": 4875, "epoch": 3 }, { "type": "loss", "content": 0.0012409423943609, "timestamp": "2025-09-10 02:26:12.390766", "step": 4876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:12.424494", "step": 4876, "epoch": 3 }, { "type": "loss", "content": 0.001259890734218061, "timestamp": "2025-09-10 02:26:12.428830", "step": 4877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:12.459636", "step": 4877, "epoch": 3 }, { "type": "loss", "content": 0.0013752224622294307, "timestamp": "2025-09-10 02:26:12.466402", "step": 4878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:12.498281", "step": 4878, "epoch": 3 }, { "type": "loss", "content": 0.00954064168035984, "timestamp": "2025-09-10 02:26:12.504671", "step": 4879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:12.538044", "step": 4879, "epoch": 3 }, { "type": "loss", "content": 0.0020668611396104097, "timestamp": "2025-09-10 02:26:12.568741", "step": 4880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:12.602992", "step": 4880, "epoch": 3 }, { "type": "loss", "content": 0.00032076804200187325, "timestamp": "2025-09-10 02:26:12.612119", "step": 4881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:12.649546", "step": 4881, "epoch": 3 }, { "type": "loss", "content": 0.0008431184687651694, "timestamp": "2025-09-10 02:26:12.663261", "step": 4882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:12.705884", "step": 4882, "epoch": 3 }, { "type": "loss", "content": 0.004074872005730867, "timestamp": "2025-09-10 02:26:12.719281", "step": 4883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:12.752046", "step": 4883, "epoch": 3 }, { "type": "loss", "content": 0.002831391990184784, "timestamp": "2025-09-10 02:26:12.776773", "step": 4884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:12.808218", "step": 4884, "epoch": 3 }, { "type": "loss", "content": 0.004910766612738371, "timestamp": "2025-09-10 02:26:12.810889", "step": 4885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:12.841938", "step": 4885, "epoch": 3 }, { "type": "loss", "content": 0.009595355950295925, "timestamp": "2025-09-10 02:26:12.854581", "step": 4886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:12.888932", "step": 4886, "epoch": 3 }, { "type": "loss", "content": 0.013698582537472248, "timestamp": "2025-09-10 02:26:12.902772", "step": 4887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:12.933454", "step": 4887, "epoch": 3 }, { "type": "loss", "content": 0.0003295644710306078, "timestamp": "2025-09-10 02:26:12.958026", "step": 4888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:12.988869", "step": 4888, "epoch": 3 }, { "type": "loss", "content": 5.0092607125407085e-05, "timestamp": "2025-09-10 02:26:12.993445", "step": 4889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:13.026601", "step": 4889, "epoch": 3 }, { "type": "loss", "content": 0.0013606924330815673, "timestamp": "2025-09-10 02:26:13.039119", "step": 4890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:13.070766", "step": 4890, "epoch": 3 }, { "type": "loss", "content": 0.0027366813737899065, "timestamp": "2025-09-10 02:26:13.077822", "step": 4891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:26:13.131591", "step": 4891, "epoch": 3 }, { "type": "loss", "content": 0.004872309975326061, "timestamp": "2025-09-10 02:26:13.173871", "step": 4892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:13.208531", "step": 4892, "epoch": 3 }, { "type": "loss", "content": 0.0004739653959404677, "timestamp": "2025-09-10 02:26:13.217311", "step": 4893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.249819", "step": 4893, "epoch": 3 }, { "type": "loss", "content": 0.0016823039622977376, "timestamp": "2025-09-10 02:26:13.254188", "step": 4894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.285267", "step": 4894, "epoch": 3 }, { "type": "loss", "content": 0.004189238417893648, "timestamp": "2025-09-10 02:26:13.289701", "step": 4895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:13.334340", "step": 4895, "epoch": 3 }, { "type": "loss", "content": 0.0009055934497155249, "timestamp": "2025-09-10 02:26:13.369012", "step": 4896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:13.403183", "step": 4896, "epoch": 3 }, { "type": "loss", "content": 0.0026929269079118967, "timestamp": "2025-09-10 02:26:13.416504", "step": 4897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.447231", "step": 4897, "epoch": 3 }, { "type": "loss", "content": 0.0013669952750205994, "timestamp": "2025-09-10 02:26:13.451788", "step": 4898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:13.483646", "step": 4898, "epoch": 3 }, { "type": "loss", "content": 0.0022419628221541643, "timestamp": "2025-09-10 02:26:13.490450", "step": 4899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:13.523859", "step": 4899, "epoch": 3 }, { "type": "loss", "content": 0.0009813571814447641, "timestamp": "2025-09-10 02:26:13.551769", "step": 4900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.583180", "step": 4900, "epoch": 3 }, { "type": "loss", "content": 0.0005111963837407529, "timestamp": "2025-09-10 02:26:13.586698", "step": 4901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:13.617822", "step": 4901, "epoch": 3 }, { "type": "loss", "content": 0.018179837614297867, "timestamp": "2025-09-10 02:26:13.628203", "step": 4902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.658289", "step": 4902, "epoch": 3 }, { "type": "loss", "content": 0.0024603954516351223, "timestamp": "2025-09-10 02:26:13.662869", "step": 4903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:13.694489", "step": 4903, "epoch": 3 }, { "type": "loss", "content": 7.283476588781923e-05, "timestamp": "2025-09-10 02:26:13.723154", "step": 4904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:13.753868", "step": 4904, "epoch": 3 }, { "type": "loss", "content": 0.0006378447869792581, "timestamp": "2025-09-10 02:26:13.758616", "step": 4905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:26:13.793601", "step": 4905, "epoch": 3 }, { "type": "loss", "content": 0.005772311706095934, "timestamp": "2025-09-10 02:26:13.807551", "step": 4906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:13.841772", "step": 4906, "epoch": 3 }, { "type": "loss", "content": 0.00017352063150610775, "timestamp": "2025-09-10 02:26:13.849465", "step": 4907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.880186", "step": 4907, "epoch": 3 }, { "type": "loss", "content": 0.0007246700115501881, "timestamp": "2025-09-10 02:26:13.905590", "step": 4908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:13.937751", "step": 4908, "epoch": 3 }, { "type": "loss", "content": 0.0003136695013381541, "timestamp": "2025-09-10 02:26:13.942598", "step": 4909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:13.974113", "step": 4909, "epoch": 3 }, { "type": "loss", "content": 0.005124423187226057, "timestamp": "2025-09-10 02:26:13.982009", "step": 4910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:14.013185", "step": 4910, "epoch": 3 }, { "type": "loss", "content": 0.0003622827643994242, "timestamp": "2025-09-10 02:26:14.023544", "step": 4911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:14.054555", "step": 4911, "epoch": 3 }, { "type": "loss", "content": 0.01249981764703989, "timestamp": "2025-09-10 02:26:14.082638", "step": 4912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:14.114302", "step": 4912, "epoch": 3 }, { "type": "loss", "content": 0.0059426832012832165, "timestamp": "2025-09-10 02:26:14.119050", "step": 4913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:14.150381", "step": 4913, "epoch": 3 }, { "type": "loss", "content": 0.00011479367094580084, "timestamp": "2025-09-10 02:26:14.161544", "step": 4914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:14.191668", "step": 4914, "epoch": 3 }, { "type": "loss", "content": 0.0021279591601341963, "timestamp": "2025-09-10 02:26:14.198639", "step": 4915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:14.229407", "step": 4915, "epoch": 3 }, { "type": "loss", "content": 4.092241215403192e-05, "timestamp": "2025-09-10 02:26:14.257786", "step": 4916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:14.288854", "step": 4916, "epoch": 3 }, { "type": "loss", "content": 0.00016833031259011477, "timestamp": "2025-09-10 02:26:14.293904", "step": 4917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:14.327484", "step": 4917, "epoch": 3 }, { "type": "loss", "content": 0.000857060425914824, "timestamp": "2025-09-10 02:26:14.335234", "step": 4918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:14.366296", "step": 4918, "epoch": 3 }, { "type": "loss", "content": 4.280987195670605e-05, "timestamp": "2025-09-10 02:26:14.373922", "step": 4919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:14.404028", "step": 4919, "epoch": 3 }, { "type": "loss", "content": 0.001588566112332046, "timestamp": "2025-09-10 02:26:14.432402", "step": 4920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:14.462073", "step": 4920, "epoch": 3 }, { "type": "loss", "content": 0.03219066932797432, "timestamp": "2025-09-10 02:26:14.466695", "step": 4921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:14.497215", "step": 4921, "epoch": 3 }, { "type": "loss", "content": 0.00037080320180393755, "timestamp": "2025-09-10 02:26:14.509609", "step": 4922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:14.539847", "step": 4922, "epoch": 3 }, { "type": "loss", "content": 0.00011398802598705515, "timestamp": "2025-09-10 02:26:14.547735", "step": 4923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:14.578237", "step": 4923, "epoch": 3 }, { "type": "loss", "content": 0.00020103438873775303, "timestamp": "2025-09-10 02:26:14.603447", "step": 4924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:14.635658", "step": 4924, "epoch": 3 }, { "type": "loss", "content": 0.0020608811173588037, "timestamp": "2025-09-10 02:26:14.640856", "step": 4925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:14.671556", "step": 4925, "epoch": 3 }, { "type": "loss", "content": 6.329066673060879e-05, "timestamp": "2025-09-10 02:26:14.675957", "step": 4926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:14.706195", "step": 4926, "epoch": 3 }, { "type": "loss", "content": 0.008835741318762302, "timestamp": "2025-09-10 02:26:14.710912", "step": 4927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:14.741722", "step": 4927, "epoch": 3 }, { "type": "loss", "content": 0.00034493012935854495, "timestamp": "2025-09-10 02:26:14.766534", "step": 4928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:14.796398", "step": 4928, "epoch": 3 }, { "type": "loss", "content": 0.003803166327998042, "timestamp": "2025-09-10 02:26:14.801450", "step": 4929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:14.832599", "step": 4929, "epoch": 3 }, { "type": "loss", "content": 0.00011779103806475177, "timestamp": "2025-09-10 02:26:14.839344", "step": 4930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:14.869905", "step": 4930, "epoch": 3 }, { "type": "loss", "content": 0.0015055211260914803, "timestamp": "2025-09-10 02:26:14.874259", "step": 4931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:14.907594", "step": 4931, "epoch": 3 }, { "type": "loss", "content": 0.0029439402278512716, "timestamp": "2025-09-10 02:26:14.941862", "step": 4932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:14.973346", "step": 4932, "epoch": 3 }, { "type": "loss", "content": 0.00018022792937699705, "timestamp": "2025-09-10 02:26:14.977958", "step": 4933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:15.009235", "step": 4933, "epoch": 3 }, { "type": "loss", "content": 0.008141091093420982, "timestamp": "2025-09-10 02:26:15.021774", "step": 4934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:15.055739", "step": 4934, "epoch": 3 }, { "type": "loss", "content": 0.00017168700287584215, "timestamp": "2025-09-10 02:26:15.069567", "step": 4935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:15.099959", "step": 4935, "epoch": 3 }, { "type": "loss", "content": 9.493528341408819e-05, "timestamp": "2025-09-10 02:26:15.128009", "step": 4936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:15.159409", "step": 4936, "epoch": 3 }, { "type": "loss", "content": 0.00017535120423417538, "timestamp": "2025-09-10 02:26:15.164334", "step": 4937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:15.195047", "step": 4937, "epoch": 3 }, { "type": "loss", "content": 0.00033908261684700847, "timestamp": "2025-09-10 02:26:15.202033", "step": 4938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:26:15.248703", "step": 4938, "epoch": 3 }, { "type": "loss", "content": 0.00015323214756790549, "timestamp": "2025-09-10 02:26:15.267902", "step": 4939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:15.301186", "step": 4939, "epoch": 3 }, { "type": "loss", "content": 0.0017902174731716514, "timestamp": "2025-09-10 02:26:15.333311", "step": 4940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:15.363730", "step": 4940, "epoch": 3 }, { "type": "loss", "content": 0.00014680066669825464, "timestamp": "2025-09-10 02:26:15.372333", "step": 4941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:15.402692", "step": 4941, "epoch": 3 }, { "type": "loss", "content": 0.00020298264280427247, "timestamp": "2025-09-10 02:26:15.407122", "step": 4942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:15.440189", "step": 4942, "epoch": 3 }, { "type": "loss", "content": 0.00014298847236204892, "timestamp": "2025-09-10 02:26:15.453645", "step": 4943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:15.485754", "step": 4943, "epoch": 3 }, { "type": "loss", "content": 0.00034119986230507493, "timestamp": "2025-09-10 02:26:15.519215", "step": 4944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:15.554520", "step": 4944, "epoch": 3 }, { "type": "loss", "content": 0.00043467155774123967, "timestamp": "2025-09-10 02:26:15.569606", "step": 4945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:15.600181", "step": 4945, "epoch": 3 }, { "type": "loss", "content": 0.00013701003626920283, "timestamp": "2025-09-10 02:26:15.610359", "step": 4946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:15.640935", "step": 4946, "epoch": 3 }, { "type": "loss", "content": 0.013453126884996891, "timestamp": "2025-09-10 02:26:15.653151", "step": 4947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:15.683338", "step": 4947, "epoch": 3 }, { "type": "loss", "content": 7.32469925424084e-05, "timestamp": "2025-09-10 02:26:15.716450", "step": 4948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:15.746478", "step": 4948, "epoch": 3 }, { "type": "loss", "content": 0.06172531098127365, "timestamp": "2025-09-10 02:26:15.751195", "step": 4949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:15.783122", "step": 4949, "epoch": 3 }, { "type": "loss", "content": 0.0008722272468730807, "timestamp": "2025-09-10 02:26:15.790214", "step": 4950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:15.820155", "step": 4950, "epoch": 3 }, { "type": "loss", "content": 0.04367053508758545, "timestamp": "2025-09-10 02:26:15.827844", "step": 4951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:15.857422", "step": 4951, "epoch": 3 }, { "type": "loss", "content": 0.0004010576813016087, "timestamp": "2025-09-10 02:26:15.885360", "step": 4952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:15.915490", "step": 4952, "epoch": 3 }, { "type": "loss", "content": 3.4004107874352485e-05, "timestamp": "2025-09-10 02:26:15.920410", "step": 4953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:15.957792", "step": 4953, "epoch": 3 }, { "type": "loss", "content": 0.007229152601212263, "timestamp": "2025-09-10 02:26:15.973402", "step": 4954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.004631", "step": 4954, "epoch": 3 }, { "type": "loss", "content": 5.849377703270875e-05, "timestamp": "2025-09-10 02:26:16.012289", "step": 4955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:16.042679", "step": 4955, "epoch": 3 }, { "type": "loss", "content": 0.004347801208496094, "timestamp": "2025-09-10 02:26:16.070650", "step": 4956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:16.101178", "step": 4956, "epoch": 3 }, { "type": "loss", "content": 0.0001966599520528689, "timestamp": "2025-09-10 02:26:16.104169", "step": 4957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:16.135122", "step": 4957, "epoch": 3 }, { "type": "loss", "content": 0.00021397892851382494, "timestamp": "2025-09-10 02:26:16.137429", "step": 4958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:16.168886", "step": 4958, "epoch": 3 }, { "type": "loss", "content": 0.00024242003564722836, "timestamp": "2025-09-10 02:26:16.176400", "step": 4959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:16.206448", "step": 4959, "epoch": 3 }, { "type": "loss", "content": 0.0008051989716477692, "timestamp": "2025-09-10 02:26:16.234381", "step": 4960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.265419", "step": 4960, "epoch": 3 }, { "type": "loss", "content": 0.0025039296597242355, "timestamp": "2025-09-10 02:26:16.270792", "step": 4961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:26:16.308395", "step": 4961, "epoch": 3 }, { "type": "loss", "content": 7.02980105415918e-05, "timestamp": "2025-09-10 02:26:16.324261", "step": 4962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:16.358251", "step": 4962, "epoch": 3 }, { "type": "loss", "content": 0.003547137137502432, "timestamp": "2025-09-10 02:26:16.365900", "step": 4963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:16.399984", "step": 4963, "epoch": 3 }, { "type": "loss", "content": 8.306949894176796e-05, "timestamp": "2025-09-10 02:26:16.434207", "step": 4964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:16.467886", "step": 4964, "epoch": 3 }, { "type": "loss", "content": 0.00013334887626115233, "timestamp": "2025-09-10 02:26:16.473130", "step": 4965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:16.504496", "step": 4965, "epoch": 3 }, { "type": "loss", "content": 5.8079971495317295e-05, "timestamp": "2025-09-10 02:26:16.516848", "step": 4966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:16.547233", "step": 4966, "epoch": 3 }, { "type": "loss", "content": 3.8230766222113743e-05, "timestamp": "2025-09-10 02:26:16.551728", "step": 4967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.582260", "step": 4967, "epoch": 3 }, { "type": "loss", "content": 0.0009217527112923563, "timestamp": "2025-09-10 02:26:16.610964", "step": 4968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:16.641445", "step": 4968, "epoch": 3 }, { "type": "loss", "content": 9.093651169678196e-05, "timestamp": "2025-09-10 02:26:16.646147", "step": 4969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.677638", "step": 4969, "epoch": 3 }, { "type": "loss", "content": 0.0006383144063875079, "timestamp": "2025-09-10 02:26:16.685338", "step": 4970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.716465", "step": 4970, "epoch": 3 }, { "type": "loss", "content": 0.00012200616765767336, "timestamp": "2025-09-10 02:26:16.724215", "step": 4971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:16.756583", "step": 4971, "epoch": 3 }, { "type": "loss", "content": 0.00018401713168714195, "timestamp": "2025-09-10 02:26:16.784994", "step": 4972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:16.815604", "step": 4972, "epoch": 3 }, { "type": "loss", "content": 3.1525421945843846e-05, "timestamp": "2025-09-10 02:26:16.817894", "step": 4973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:16.849803", "step": 4973, "epoch": 3 }, { "type": "loss", "content": 0.00023603920999448746, "timestamp": "2025-09-10 02:26:16.857514", "step": 4974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:16.891206", "step": 4974, "epoch": 3 }, { "type": "loss", "content": 0.0003533114795573056, "timestamp": "2025-09-10 02:26:16.904618", "step": 4975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:16.936270", "step": 4975, "epoch": 3 }, { "type": "loss", "content": 0.07041340321302414, "timestamp": "2025-09-10 02:26:16.964307", "step": 4976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:16.995447", "step": 4976, "epoch": 3 }, { "type": "loss", "content": 0.0019405941711738706, "timestamp": "2025-09-10 02:26:17.000357", "step": 4977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:17.034936", "step": 4977, "epoch": 3 }, { "type": "loss", "content": 0.0004297175328247249, "timestamp": "2025-09-10 02:26:17.045363", "step": 4978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:17.077867", "step": 4978, "epoch": 3 }, { "type": "loss", "content": 0.00030306234839372337, "timestamp": "2025-09-10 02:26:17.085791", "step": 4979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:17.119765", "step": 4979, "epoch": 3 }, { "type": "loss", "content": 0.000214372223126702, "timestamp": "2025-09-10 02:26:17.154008", "step": 4980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:17.185421", "step": 4980, "epoch": 3 }, { "type": "loss", "content": 0.0060597313567996025, "timestamp": "2025-09-10 02:26:17.190972", "step": 4981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:17.224555", "step": 4981, "epoch": 3 }, { "type": "loss", "content": 0.0004712548106908798, "timestamp": "2025-09-10 02:26:17.226680", "step": 4982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:17.259673", "step": 4982, "epoch": 3 }, { "type": "loss", "content": 0.018239330500364304, "timestamp": "2025-09-10 02:26:17.266706", "step": 4983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:17.298378", "step": 4983, "epoch": 3 }, { "type": "loss", "content": 5.246271757641807e-05, "timestamp": "2025-09-10 02:26:17.331775", "step": 4984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:17.362536", "step": 4984, "epoch": 3 }, { "type": "loss", "content": 0.000588534923736006, "timestamp": "2025-09-10 02:26:17.367121", "step": 4985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:17.397655", "step": 4985, "epoch": 3 }, { "type": "loss", "content": 0.04317157343029976, "timestamp": "2025-09-10 02:26:17.405334", "step": 4986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:17.436371", "step": 4986, "epoch": 3 }, { "type": "loss", "content": 0.0006422134465537965, "timestamp": "2025-09-10 02:26:17.443245", "step": 4987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:17.474316", "step": 4987, "epoch": 3 }, { "type": "loss", "content": 0.00034632027382031083, "timestamp": "2025-09-10 02:26:17.502573", "step": 4988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:17.535084", "step": 4988, "epoch": 3 }, { "type": "loss", "content": 8.68417409947142e-05, "timestamp": "2025-09-10 02:26:17.539828", "step": 4989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:17.572346", "step": 4989, "epoch": 3 }, { "type": "loss", "content": 0.00013398627925198525, "timestamp": "2025-09-10 02:26:17.576500", "step": 4990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:17.612524", "step": 4990, "epoch": 3 }, { "type": "loss", "content": 0.0029133365023881197, "timestamp": "2025-09-10 02:26:17.626175", "step": 4991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:17.658240", "step": 4991, "epoch": 3 }, { "type": "loss", "content": 8.629496005596593e-05, "timestamp": "2025-09-10 02:26:17.686141", "step": 4992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:17.718276", "step": 4992, "epoch": 3 }, { "type": "loss", "content": 0.0002693708229344338, "timestamp": "2025-09-10 02:26:17.723486", "step": 4993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:17.755369", "step": 4993, "epoch": 3 }, { "type": "loss", "content": 0.00012545159552246332, "timestamp": "2025-09-10 02:26:17.763054", "step": 4994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:17.795274", "step": 4994, "epoch": 3 }, { "type": "loss", "content": 0.0005542716244235635, "timestamp": "2025-09-10 02:26:17.807429", "step": 4995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:17.838080", "step": 4995, "epoch": 3 }, { "type": "loss", "content": 0.0003210590220987797, "timestamp": "2025-09-10 02:26:17.861907", "step": 4996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:17.893230", "step": 4996, "epoch": 3 }, { "type": "loss", "content": 7.695386011619121e-05, "timestamp": "2025-09-10 02:26:17.898024", "step": 4997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:17.928678", "step": 4997, "epoch": 3 }, { "type": "loss", "content": 0.0001586790895089507, "timestamp": "2025-09-10 02:26:17.936042", "step": 4998, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:26:28.250382", "step": 4998, "epoch": 3 }, { "type": "pplx", "content": 20015215.356057025, "timestamp": "2025-09-10 02:26:28.261232", "step": 4998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:28.310477", "step": 4998, "epoch": 3 }, { "type": "loss", "content": 0.0005191663512960076, "timestamp": "2025-09-10 02:26:28.323762", "step": 4999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:28.385226", "step": 4999, "epoch": 3 }, { "type": "loss", "content": 0.0004746699705719948, "timestamp": "2025-09-10 02:26:28.412768", "step": 5000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5000", "timestamp": "2025-09-10 02:26:33.179348", "step": 5000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:33.211464", "step": 5000, "epoch": 3 }, { "type": "loss", "content": 0.0004966092528775334, "timestamp": "2025-09-10 02:26:33.215172", "step": 5001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:33.247140", "step": 5001, "epoch": 3 }, { "type": "loss", "content": 0.0005471754702739418, "timestamp": "2025-09-10 02:26:33.253624", "step": 5002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:33.286134", "step": 5002, "epoch": 3 }, { "type": "loss", "content": 0.00014292819832917303, "timestamp": "2025-09-10 02:26:33.295863", "step": 5003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:33.327101", "step": 5003, "epoch": 3 }, { "type": "loss", "content": 0.0004297696577850729, "timestamp": "2025-09-10 02:26:33.354748", "step": 5004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:33.386788", "step": 5004, "epoch": 3 }, { "type": "loss", "content": 8.166915358742699e-05, "timestamp": "2025-09-10 02:26:33.396413", "step": 5005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:33.427713", "step": 5005, "epoch": 3 }, { "type": "loss", "content": 0.00015276219346560538, "timestamp": "2025-09-10 02:26:33.438578", "step": 5006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:33.472065", "step": 5006, "epoch": 3 }, { "type": "loss", "content": 0.0017286234069615602, "timestamp": "2025-09-10 02:26:33.482351", "step": 5007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:33.512627", "step": 5007, "epoch": 3 }, { "type": "loss", "content": 0.00016119235078804195, "timestamp": "2025-09-10 02:26:33.540535", "step": 5008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:33.573284", "step": 5008, "epoch": 3 }, { "type": "loss", "content": 0.0011435570195317268, "timestamp": "2025-09-10 02:26:33.586284", "step": 5009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:33.621203", "step": 5009, "epoch": 3 }, { "type": "loss", "content": 0.0010838081361725926, "timestamp": "2025-09-10 02:26:33.635195", "step": 5010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:33.665549", "step": 5010, "epoch": 3 }, { "type": "loss", "content": 0.0003136697050649673, "timestamp": "2025-09-10 02:26:33.672471", "step": 5011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:33.703720", "step": 5011, "epoch": 3 }, { "type": "loss", "content": 5.7531153288437054e-05, "timestamp": "2025-09-10 02:26:33.728906", "step": 5012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:33.765485", "step": 5012, "epoch": 3 }, { "type": "loss", "content": 0.001678946428000927, "timestamp": "2025-09-10 02:26:33.780640", "step": 5013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:33.812596", "step": 5013, "epoch": 3 }, { "type": "loss", "content": 0.0004162538971286267, "timestamp": "2025-09-10 02:26:33.815078", "step": 5014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:33.847600", "step": 5014, "epoch": 3 }, { "type": "loss", "content": 0.0012148728128522635, "timestamp": "2025-09-10 02:26:33.852067", "step": 5015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:33.883868", "step": 5015, "epoch": 3 }, { "type": "loss", "content": 0.019105346873402596, "timestamp": "2025-09-10 02:26:33.915046", "step": 5016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:33.946080", "step": 5016, "epoch": 3 }, { "type": "loss", "content": 0.001212744740769267, "timestamp": "2025-09-10 02:26:33.953972", "step": 5017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:33.986413", "step": 5017, "epoch": 3 }, { "type": "loss", "content": 0.0005570516805164516, "timestamp": "2025-09-10 02:26:33.993998", "step": 5018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:34.024829", "step": 5018, "epoch": 3 }, { "type": "loss", "content": 0.004198791459202766, "timestamp": "2025-09-10 02:26:34.035098", "step": 5019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:34.066423", "step": 5019, "epoch": 3 }, { "type": "loss", "content": 0.0003597374598030001, "timestamp": "2025-09-10 02:26:34.098448", "step": 5020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:34.131175", "step": 5020, "epoch": 3 }, { "type": "loss", "content": 0.01109243929386139, "timestamp": "2025-09-10 02:26:34.141461", "step": 5021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:34.172970", "step": 5021, "epoch": 3 }, { "type": "loss", "content": 0.0014968998730182648, "timestamp": "2025-09-10 02:26:34.180732", "step": 5022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:34.212729", "step": 5022, "epoch": 3 }, { "type": "loss", "content": 0.00012139989848947152, "timestamp": "2025-09-10 02:26:34.219631", "step": 5023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:34.251172", "step": 5023, "epoch": 3 }, { "type": "loss", "content": 0.0002333705051569268, "timestamp": "2025-09-10 02:26:34.277453", "step": 5024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:34.308015", "step": 5024, "epoch": 3 }, { "type": "loss", "content": 0.0001543233374832198, "timestamp": "2025-09-10 02:26:34.312696", "step": 5025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:34.347950", "step": 5025, "epoch": 3 }, { "type": "loss", "content": 0.0005688256933353841, "timestamp": "2025-09-10 02:26:34.361964", "step": 5026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:34.397329", "step": 5026, "epoch": 3 }, { "type": "loss", "content": 0.00042403684346936643, "timestamp": "2025-09-10 02:26:34.410955", "step": 5027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:34.444474", "step": 5027, "epoch": 3 }, { "type": "loss", "content": 0.0002217363507952541, "timestamp": "2025-09-10 02:26:34.477391", "step": 5028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:34.508716", "step": 5028, "epoch": 3 }, { "type": "loss", "content": 0.0006893317913636565, "timestamp": "2025-09-10 02:26:34.511043", "step": 5029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:34.541016", "step": 5029, "epoch": 3 }, { "type": "loss", "content": 0.008850133046507835, "timestamp": "2025-09-10 02:26:34.548002", "step": 5030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:34.582927", "step": 5030, "epoch": 3 }, { "type": "loss", "content": 0.00032538484083488584, "timestamp": "2025-09-10 02:26:34.590672", "step": 5031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:34.626976", "step": 5031, "epoch": 3 }, { "type": "loss", "content": 0.00039912323700264096, "timestamp": "2025-09-10 02:26:34.658831", "step": 5032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:34.695229", "step": 5032, "epoch": 3 }, { "type": "loss", "content": 0.007538055535405874, "timestamp": "2025-09-10 02:26:34.699593", "step": 5033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:34.730572", "step": 5033, "epoch": 3 }, { "type": "loss", "content": 6.657992344116792e-05, "timestamp": "2025-09-10 02:26:34.740878", "step": 5034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:34.779662", "step": 5034, "epoch": 3 }, { "type": "loss", "content": 0.0010134560288861394, "timestamp": "2025-09-10 02:26:34.795315", "step": 5035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:34.827775", "step": 5035, "epoch": 3 }, { "type": "loss", "content": 0.0024202538188546896, "timestamp": "2025-09-10 02:26:34.856235", "step": 5036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:34.886920", "step": 5036, "epoch": 3 }, { "type": "loss", "content": 0.0004926332621835172, "timestamp": "2025-09-10 02:26:34.897060", "step": 5037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:34.928166", "step": 5037, "epoch": 3 }, { "type": "loss", "content": 0.0006999452598392963, "timestamp": "2025-09-10 02:26:34.935741", "step": 5038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:34.966631", "step": 5038, "epoch": 3 }, { "type": "loss", "content": 0.005028885323554277, "timestamp": "2025-09-10 02:26:34.974266", "step": 5039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:35.008987", "step": 5039, "epoch": 3 }, { "type": "loss", "content": 0.00027442388818599284, "timestamp": "2025-09-10 02:26:35.034466", "step": 5040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:35.066296", "step": 5040, "epoch": 3 }, { "type": "loss", "content": 0.005980245769023895, "timestamp": "2025-09-10 02:26:35.068853", "step": 5041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:35.099419", "step": 5041, "epoch": 3 }, { "type": "loss", "content": 0.00015993161650840193, "timestamp": "2025-09-10 02:26:35.109597", "step": 5042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:35.142793", "step": 5042, "epoch": 3 }, { "type": "loss", "content": 0.0005007135332562029, "timestamp": "2025-09-10 02:26:35.147451", "step": 5043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:35.180162", "step": 5043, "epoch": 3 }, { "type": "loss", "content": 0.0006699333898723125, "timestamp": "2025-09-10 02:26:35.207921", "step": 5044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:35.240430", "step": 5044, "epoch": 3 }, { "type": "loss", "content": 0.00028629746520891786, "timestamp": "2025-09-10 02:26:35.245081", "step": 5045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:35.277554", "step": 5045, "epoch": 3 }, { "type": "loss", "content": 0.0006818973342888057, "timestamp": "2025-09-10 02:26:35.287372", "step": 5046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:35.318835", "step": 5046, "epoch": 3 }, { "type": "loss", "content": 0.002404263708740473, "timestamp": "2025-09-10 02:26:35.326633", "step": 5047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:35.359340", "step": 5047, "epoch": 3 }, { "type": "loss", "content": 0.02151825651526451, "timestamp": "2025-09-10 02:26:35.387052", "step": 5048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:35.417860", "step": 5048, "epoch": 3 }, { "type": "loss", "content": 0.0003552958951331675, "timestamp": "2025-09-10 02:26:35.422447", "step": 5049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:35.452240", "step": 5049, "epoch": 3 }, { "type": "loss", "content": 0.00025253373314626515, "timestamp": "2025-09-10 02:26:35.459190", "step": 5050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:35.489346", "step": 5050, "epoch": 3 }, { "type": "loss", "content": 0.00040127182728610933, "timestamp": "2025-09-10 02:26:35.493878", "step": 5051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:35.524519", "step": 5051, "epoch": 3 }, { "type": "loss", "content": 0.0006088269292376935, "timestamp": "2025-09-10 02:26:35.549424", "step": 5052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:35.582289", "step": 5052, "epoch": 3 }, { "type": "loss", "content": 0.00042608132935129106, "timestamp": "2025-09-10 02:26:35.585338", "step": 5053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:35.617714", "step": 5053, "epoch": 3 }, { "type": "loss", "content": 0.0003567171806935221, "timestamp": "2025-09-10 02:26:35.621800", "step": 5054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:35.653481", "step": 5054, "epoch": 3 }, { "type": "loss", "content": 0.0005917255766689777, "timestamp": "2025-09-10 02:26:35.663678", "step": 5055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:35.694835", "step": 5055, "epoch": 3 }, { "type": "loss", "content": 0.00010045560338767245, "timestamp": "2025-09-10 02:26:35.718582", "step": 5056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:35.750639", "step": 5056, "epoch": 3 }, { "type": "loss", "content": 0.0001807038497645408, "timestamp": "2025-09-10 02:26:35.755218", "step": 5057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:35.790888", "step": 5057, "epoch": 3 }, { "type": "loss", "content": 0.001338689005933702, "timestamp": "2025-09-10 02:26:35.798429", "step": 5058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:35.833677", "step": 5058, "epoch": 3 }, { "type": "loss", "content": 0.0007186224684119225, "timestamp": "2025-09-10 02:26:35.845938", "step": 5059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:35.876959", "step": 5059, "epoch": 3 }, { "type": "loss", "content": 0.001849995693191886, "timestamp": "2025-09-10 02:26:35.902008", "step": 5060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:35.936006", "step": 5060, "epoch": 3 }, { "type": "loss", "content": 0.00040959817124530673, "timestamp": "2025-09-10 02:26:35.944168", "step": 5061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:35.978195", "step": 5061, "epoch": 3 }, { "type": "loss", "content": 0.0007337800343520939, "timestamp": "2025-09-10 02:26:35.985672", "step": 5062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:36.017177", "step": 5062, "epoch": 3 }, { "type": "loss", "content": 0.004766891244798899, "timestamp": "2025-09-10 02:26:36.024108", "step": 5063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:36.057466", "step": 5063, "epoch": 3 }, { "type": "loss", "content": 0.0009358166716992855, "timestamp": "2025-09-10 02:26:36.085206", "step": 5064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:36.119710", "step": 5064, "epoch": 3 }, { "type": "loss", "content": 0.0004207981692161411, "timestamp": "2025-09-10 02:26:36.128101", "step": 5065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:36.160614", "step": 5065, "epoch": 3 }, { "type": "loss", "content": 0.0005621476448141038, "timestamp": "2025-09-10 02:26:36.167553", "step": 5066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:36.198941", "step": 5066, "epoch": 3 }, { "type": "loss", "content": 0.0005337732727639377, "timestamp": "2025-09-10 02:26:36.205778", "step": 5067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:36.239845", "step": 5067, "epoch": 3 }, { "type": "loss", "content": 0.0006483304314315319, "timestamp": "2025-09-10 02:26:36.268263", "step": 5068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:36.299494", "step": 5068, "epoch": 3 }, { "type": "loss", "content": 0.0004982685786671937, "timestamp": "2025-09-10 02:26:36.308801", "step": 5069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:36.348134", "step": 5069, "epoch": 3 }, { "type": "loss", "content": 0.000861111271660775, "timestamp": "2025-09-10 02:26:36.361512", "step": 5070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:36.393080", "step": 5070, "epoch": 3 }, { "type": "loss", "content": 0.00017864606343209743, "timestamp": "2025-09-10 02:26:36.403739", "step": 5071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:26:36.460224", "step": 5071, "epoch": 3 }, { "type": "loss", "content": 0.0005434316699393094, "timestamp": "2025-09-10 02:26:36.504467", "step": 5072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:36.537697", "step": 5072, "epoch": 3 }, { "type": "loss", "content": 0.00014476195792667568, "timestamp": "2025-09-10 02:26:36.542072", "step": 5073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:36.576057", "step": 5073, "epoch": 3 }, { "type": "loss", "content": 0.00019232665363233536, "timestamp": "2025-09-10 02:26:36.582602", "step": 5074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:36.616440", "step": 5074, "epoch": 3 }, { "type": "loss", "content": 0.0004715229442808777, "timestamp": "2025-09-10 02:26:36.624236", "step": 5075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:36.657634", "step": 5075, "epoch": 3 }, { "type": "loss", "content": 0.00015088057261891663, "timestamp": "2025-09-10 02:26:36.685386", "step": 5076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:36.718883", "step": 5076, "epoch": 3 }, { "type": "loss", "content": 0.0018138757441192865, "timestamp": "2025-09-10 02:26:36.723325", "step": 5077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:36.756301", "step": 5077, "epoch": 3 }, { "type": "loss", "content": 0.00036036456003785133, "timestamp": "2025-09-10 02:26:36.768752", "step": 5078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:36.802805", "step": 5078, "epoch": 3 }, { "type": "loss", "content": 0.00014213754911907017, "timestamp": "2025-09-10 02:26:36.809991", "step": 5079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:36.842491", "step": 5079, "epoch": 3 }, { "type": "loss", "content": 0.0002783481031656265, "timestamp": "2025-09-10 02:26:36.870966", "step": 5080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:36.905049", "step": 5080, "epoch": 3 }, { "type": "loss", "content": 0.0010689280461519957, "timestamp": "2025-09-10 02:26:36.909941", "step": 5081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:36.943472", "step": 5081, "epoch": 3 }, { "type": "loss", "content": 0.004427754320204258, "timestamp": "2025-09-10 02:26:36.950393", "step": 5082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:36.981893", "step": 5082, "epoch": 3 }, { "type": "loss", "content": 0.0009393549407832325, "timestamp": "2025-09-10 02:26:36.988419", "step": 5083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:37.020810", "step": 5083, "epoch": 3 }, { "type": "loss", "content": 0.021152915433049202, "timestamp": "2025-09-10 02:26:37.048368", "step": 5084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:37.080873", "step": 5084, "epoch": 3 }, { "type": "loss", "content": 0.002822284121066332, "timestamp": "2025-09-10 02:26:37.083125", "step": 5085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:37.116842", "step": 5085, "epoch": 3 }, { "type": "loss", "content": 0.049205031245946884, "timestamp": "2025-09-10 02:26:37.127741", "step": 5086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:37.160653", "step": 5086, "epoch": 3 }, { "type": "loss", "content": 0.00024678107001818717, "timestamp": "2025-09-10 02:26:37.167406", "step": 5087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:37.203699", "step": 5087, "epoch": 3 }, { "type": "loss", "content": 0.005720262881368399, "timestamp": "2025-09-10 02:26:37.231850", "step": 5088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.263031", "step": 5088, "epoch": 3 }, { "type": "loss", "content": 0.001497715711593628, "timestamp": "2025-09-10 02:26:37.267326", "step": 5089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:37.302313", "step": 5089, "epoch": 3 }, { "type": "loss", "content": 0.002114651957526803, "timestamp": "2025-09-10 02:26:37.314889", "step": 5090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:26:37.356579", "step": 5090, "epoch": 3 }, { "type": "loss", "content": 0.011306433007121086, "timestamp": "2025-09-10 02:26:37.374278", "step": 5091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:37.404869", "step": 5091, "epoch": 3 }, { "type": "loss", "content": 0.004676491022109985, "timestamp": "2025-09-10 02:26:37.433034", "step": 5092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.462511", "step": 5092, "epoch": 3 }, { "type": "loss", "content": 0.0011396221816539764, "timestamp": "2025-09-10 02:26:37.467061", "step": 5093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:37.496773", "step": 5093, "epoch": 3 }, { "type": "loss", "content": 0.000759919814299792, "timestamp": "2025-09-10 02:26:37.503790", "step": 5094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.535802", "step": 5094, "epoch": 3 }, { "type": "loss", "content": 0.00020001002121716738, "timestamp": "2025-09-10 02:26:37.542443", "step": 5095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.572089", "step": 5095, "epoch": 3 }, { "type": "loss", "content": 0.00017582971486262977, "timestamp": "2025-09-10 02:26:37.599644", "step": 5096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.630671", "step": 5096, "epoch": 3 }, { "type": "loss", "content": 0.0004590843745972961, "timestamp": "2025-09-10 02:26:37.635779", "step": 5097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:37.676345", "step": 5097, "epoch": 3 }, { "type": "loss", "content": 0.0002961141581181437, "timestamp": "2025-09-10 02:26:37.686453", "step": 5098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:37.726004", "step": 5098, "epoch": 3 }, { "type": "loss", "content": 0.00022437986626755446, "timestamp": "2025-09-10 02:26:37.739344", "step": 5099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:37.773356", "step": 5099, "epoch": 3 }, { "type": "loss", "content": 0.0002960095298476517, "timestamp": "2025-09-10 02:26:37.800799", "step": 5100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:37.831592", "step": 5100, "epoch": 3 }, { "type": "loss", "content": 0.00015804110444150865, "timestamp": "2025-09-10 02:26:37.836206", "step": 5101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:37.871853", "step": 5101, "epoch": 3 }, { "type": "loss", "content": 0.0004349082300905138, "timestamp": "2025-09-10 02:26:37.885536", "step": 5102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:37.915557", "step": 5102, "epoch": 3 }, { "type": "loss", "content": 0.0009989996906369925, "timestamp": "2025-09-10 02:26:37.922804", "step": 5103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:37.956305", "step": 5103, "epoch": 3 }, { "type": "loss", "content": 0.0002965817984659225, "timestamp": "2025-09-10 02:26:37.981392", "step": 5104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:38.019315", "step": 5104, "epoch": 3 }, { "type": "loss", "content": 0.00011326335516059771, "timestamp": "2025-09-10 02:26:38.027976", "step": 5105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:38.059644", "step": 5105, "epoch": 3 }, { "type": "loss", "content": 0.0006430582143366337, "timestamp": "2025-09-10 02:26:38.067309", "step": 5106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:38.103324", "step": 5106, "epoch": 3 }, { "type": "loss", "content": 0.03719168156385422, "timestamp": "2025-09-10 02:26:38.113064", "step": 5107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:38.150802", "step": 5107, "epoch": 3 }, { "type": "loss", "content": 0.0005424571572802961, "timestamp": "2025-09-10 02:26:38.181667", "step": 5108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:38.214866", "step": 5108, "epoch": 3 }, { "type": "loss", "content": 0.00010797369759529829, "timestamp": "2025-09-10 02:26:38.219882", "step": 5109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:38.251030", "step": 5109, "epoch": 3 }, { "type": "loss", "content": 8.883118425728753e-05, "timestamp": "2025-09-10 02:26:38.263566", "step": 5110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:38.294698", "step": 5110, "epoch": 3 }, { "type": "loss", "content": 5.636332571157254e-05, "timestamp": "2025-09-10 02:26:38.301741", "step": 5111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:38.333874", "step": 5111, "epoch": 3 }, { "type": "loss", "content": 0.00013791404489893466, "timestamp": "2025-09-10 02:26:38.362134", "step": 5112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:38.402640", "step": 5112, "epoch": 3 }, { "type": "loss", "content": 0.0002762853109743446, "timestamp": "2025-09-10 02:26:38.407777", "step": 5113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:38.449345", "step": 5113, "epoch": 3 }, { "type": "loss", "content": 0.00036431459011510015, "timestamp": "2025-09-10 02:26:38.453859", "step": 5114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:38.484171", "step": 5114, "epoch": 3 }, { "type": "loss", "content": 0.002119356067851186, "timestamp": "2025-09-10 02:26:38.490985", "step": 5115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:38.537020", "step": 5115, "epoch": 3 }, { "type": "loss", "content": 0.0015236083418130875, "timestamp": "2025-09-10 02:26:38.568091", "step": 5116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:38.600258", "step": 5116, "epoch": 3 }, { "type": "loss", "content": 0.0002494109212420881, "timestamp": "2025-09-10 02:26:38.610000", "step": 5117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:38.642033", "step": 5117, "epoch": 3 }, { "type": "loss", "content": 0.00010412498522782698, "timestamp": "2025-09-10 02:26:38.646738", "step": 5118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:38.678539", "step": 5118, "epoch": 3 }, { "type": "loss", "content": 7.976993947522715e-05, "timestamp": "2025-09-10 02:26:38.685400", "step": 5119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:38.718264", "step": 5119, "epoch": 3 }, { "type": "loss", "content": 0.002568106632679701, "timestamp": "2025-09-10 02:26:38.745928", "step": 5120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:38.780243", "step": 5120, "epoch": 3 }, { "type": "loss", "content": 0.0005041944095864892, "timestamp": "2025-09-10 02:26:38.785032", "step": 5121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:38.817838", "step": 5121, "epoch": 3 }, { "type": "loss", "content": 0.00011331056157359853, "timestamp": "2025-09-10 02:26:38.829518", "step": 5122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:38.862267", "step": 5122, "epoch": 3 }, { "type": "loss", "content": 0.00043031698442064226, "timestamp": "2025-09-10 02:26:38.874856", "step": 5123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:38.908541", "step": 5123, "epoch": 3 }, { "type": "loss", "content": 0.002998805372044444, "timestamp": "2025-09-10 02:26:38.936629", "step": 5124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:38.968508", "step": 5124, "epoch": 3 }, { "type": "loss", "content": 0.01266338862478733, "timestamp": "2025-09-10 02:26:38.973088", "step": 5125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:39.003978", "step": 5125, "epoch": 3 }, { "type": "loss", "content": 0.0002824350376613438, "timestamp": "2025-09-10 02:26:39.010652", "step": 5126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:39.042165", "step": 5126, "epoch": 3 }, { "type": "loss", "content": 0.00010247425962006673, "timestamp": "2025-09-10 02:26:39.046697", "step": 5127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:39.081094", "step": 5127, "epoch": 3 }, { "type": "loss", "content": 0.0008860399248078465, "timestamp": "2025-09-10 02:26:39.108780", "step": 5128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:39.143111", "step": 5128, "epoch": 3 }, { "type": "loss", "content": 8.454523049294949e-05, "timestamp": "2025-09-10 02:26:39.147281", "step": 5129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.187542", "step": 5129, "epoch": 3 }, { "type": "loss", "content": 0.0003048314538318664, "timestamp": "2025-09-10 02:26:39.197351", "step": 5130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.230939", "step": 5130, "epoch": 3 }, { "type": "loss", "content": 0.00018424120207782835, "timestamp": "2025-09-10 02:26:39.240890", "step": 5131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:39.272716", "step": 5131, "epoch": 3 }, { "type": "loss", "content": 0.0006012596422806382, "timestamp": "2025-09-10 02:26:39.300808", "step": 5132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.332691", "step": 5132, "epoch": 3 }, { "type": "loss", "content": 0.0003009784559253603, "timestamp": "2025-09-10 02:26:39.339735", "step": 5133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.375609", "step": 5133, "epoch": 3 }, { "type": "loss", "content": 0.03187219798564911, "timestamp": "2025-09-10 02:26:39.385234", "step": 5134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:39.423679", "step": 5134, "epoch": 3 }, { "type": "loss", "content": 0.0010775693226605654, "timestamp": "2025-09-10 02:26:39.434241", "step": 5135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:39.464830", "step": 5135, "epoch": 3 }, { "type": "loss", "content": 0.001194292795844376, "timestamp": "2025-09-10 02:26:39.496349", "step": 5136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.527484", "step": 5136, "epoch": 3 }, { "type": "loss", "content": 0.0019652375485748053, "timestamp": "2025-09-10 02:26:39.535242", "step": 5137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:39.568984", "step": 5137, "epoch": 3 }, { "type": "loss", "content": 0.010311486199498177, "timestamp": "2025-09-10 02:26:39.576682", "step": 5138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:39.606200", "step": 5138, "epoch": 3 }, { "type": "loss", "content": 0.00019729572522919625, "timestamp": "2025-09-10 02:26:39.613264", "step": 5139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:39.643471", "step": 5139, "epoch": 3 }, { "type": "loss", "content": 0.00033027934841811657, "timestamp": "2025-09-10 02:26:39.671867", "step": 5140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:39.703487", "step": 5140, "epoch": 3 }, { "type": "loss", "content": 0.0022352919913828373, "timestamp": "2025-09-10 02:26:39.705615", "step": 5141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.737702", "step": 5141, "epoch": 3 }, { "type": "loss", "content": 0.002084512962028384, "timestamp": "2025-09-10 02:26:39.746825", "step": 5142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:39.778404", "step": 5142, "epoch": 3 }, { "type": "loss", "content": 0.00020568576292134821, "timestamp": "2025-09-10 02:26:39.789245", "step": 5143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:39.818748", "step": 5143, "epoch": 3 }, { "type": "loss", "content": 0.00034350433270446956, "timestamp": "2025-09-10 02:26:39.849832", "step": 5144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:39.882711", "step": 5144, "epoch": 3 }, { "type": "loss", "content": 9.809032781049609e-05, "timestamp": "2025-09-10 02:26:39.892207", "step": 5145, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:26:50.094432", "step": 5145, "epoch": 3 }, { "type": "pplx", "content": 19013441.374623075, "timestamp": "2025-09-10 02:26:50.098424", "step": 5145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:50.128744", "step": 5145, "epoch": 3 }, { "type": "loss", "content": 0.0014527181629091501, "timestamp": "2025-09-10 02:26:50.130787", "step": 5146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:50.162665", "step": 5146, "epoch": 3 }, { "type": "loss", "content": 0.027358056977391243, "timestamp": "2025-09-10 02:26:50.168867", "step": 5147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:50.202488", "step": 5147, "epoch": 3 }, { "type": "loss", "content": 0.0004902129294350743, "timestamp": "2025-09-10 02:26:50.229570", "step": 5148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:50.265151", "step": 5148, "epoch": 3 }, { "type": "loss", "content": 0.00014729471877217293, "timestamp": "2025-09-10 02:26:50.269830", "step": 5149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:50.302158", "step": 5149, "epoch": 3 }, { "type": "loss", "content": 0.003401133930310607, "timestamp": "2025-09-10 02:26:50.309049", "step": 5150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:50.340073", "step": 5150, "epoch": 3 }, { "type": "loss", "content": 0.00013149350706953555, "timestamp": "2025-09-10 02:26:50.351481", "step": 5151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:50.385466", "step": 5151, "epoch": 3 }, { "type": "loss", "content": 0.01326068490743637, "timestamp": "2025-09-10 02:26:50.419683", "step": 5152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:50.451962", "step": 5152, "epoch": 3 }, { "type": "loss", "content": 0.00036567659117281437, "timestamp": "2025-09-10 02:26:50.456231", "step": 5153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:50.488390", "step": 5153, "epoch": 3 }, { "type": "loss", "content": 0.013120094314217567, "timestamp": "2025-09-10 02:26:50.495873", "step": 5154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:50.529639", "step": 5154, "epoch": 3 }, { "type": "loss", "content": 0.00017451155872549862, "timestamp": "2025-09-10 02:26:50.536633", "step": 5155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:50.567172", "step": 5155, "epoch": 3 }, { "type": "loss", "content": 0.00037706297007389367, "timestamp": "2025-09-10 02:26:50.592290", "step": 5156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:50.624924", "step": 5156, "epoch": 3 }, { "type": "loss", "content": 0.0002998369454871863, "timestamp": "2025-09-10 02:26:50.628990", "step": 5157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:50.661318", "step": 5157, "epoch": 3 }, { "type": "loss", "content": 0.0006355083896778524, "timestamp": "2025-09-10 02:26:50.665583", "step": 5158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:50.700149", "step": 5158, "epoch": 3 }, { "type": "loss", "content": 0.0024739918299019337, "timestamp": "2025-09-10 02:26:50.714091", "step": 5159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:50.747276", "step": 5159, "epoch": 3 }, { "type": "loss", "content": 0.0006842431612312794, "timestamp": "2025-09-10 02:26:50.772316", "step": 5160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:50.804366", "step": 5160, "epoch": 3 }, { "type": "loss", "content": 0.0001715581602184102, "timestamp": "2025-09-10 02:26:50.806614", "step": 5161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:50.838444", "step": 5161, "epoch": 3 }, { "type": "loss", "content": 0.0001334312546532601, "timestamp": "2025-09-10 02:26:50.850899", "step": 5162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:50.882059", "step": 5162, "epoch": 3 }, { "type": "loss", "content": 0.0015925957122817636, "timestamp": "2025-09-10 02:26:50.892413", "step": 5163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:50.924795", "step": 5163, "epoch": 3 }, { "type": "loss", "content": 0.035536449402570724, "timestamp": "2025-09-10 02:26:50.952247", "step": 5164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:50.984434", "step": 5164, "epoch": 3 }, { "type": "loss", "content": 0.032359056174755096, "timestamp": "2025-09-10 02:26:50.988981", "step": 5165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.019851", "step": 5165, "epoch": 3 }, { "type": "loss", "content": 0.0012751846807077527, "timestamp": "2025-09-10 02:26:51.024166", "step": 5166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.057009", "step": 5166, "epoch": 3 }, { "type": "loss", "content": 0.0008924083085730672, "timestamp": "2025-09-10 02:26:51.061492", "step": 5167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:51.094260", "step": 5167, "epoch": 3 }, { "type": "loss", "content": 0.0009923691395670176, "timestamp": "2025-09-10 02:26:51.127680", "step": 5168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:51.159718", "step": 5168, "epoch": 3 }, { "type": "loss", "content": 0.004293483681976795, "timestamp": "2025-09-10 02:26:51.167509", "step": 5169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:51.198390", "step": 5169, "epoch": 3 }, { "type": "loss", "content": 0.0001559741358505562, "timestamp": "2025-09-10 02:26:51.202516", "step": 5170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:51.234801", "step": 5170, "epoch": 3 }, { "type": "loss", "content": 0.0003882385208271444, "timestamp": "2025-09-10 02:26:51.242426", "step": 5171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.274436", "step": 5171, "epoch": 3 }, { "type": "loss", "content": 0.0005971429636701941, "timestamp": "2025-09-10 02:26:51.299776", "step": 5172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:51.330997", "step": 5172, "epoch": 3 }, { "type": "loss", "content": 0.00021882994042243809, "timestamp": "2025-09-10 02:26:51.336487", "step": 5173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:51.367881", "step": 5173, "epoch": 3 }, { "type": "loss", "content": 0.0011246565263718367, "timestamp": "2025-09-10 02:26:51.375480", "step": 5174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.405682", "step": 5174, "epoch": 3 }, { "type": "loss", "content": 0.00022782094310969114, "timestamp": "2025-09-10 02:26:51.410086", "step": 5175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.442061", "step": 5175, "epoch": 3 }, { "type": "loss", "content": 0.00039846167783252895, "timestamp": "2025-09-10 02:26:51.467694", "step": 5176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:51.501857", "step": 5176, "epoch": 3 }, { "type": "loss", "content": 0.00024708619457669556, "timestamp": "2025-09-10 02:26:51.514555", "step": 5177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:51.546565", "step": 5177, "epoch": 3 }, { "type": "loss", "content": 0.05931756645441055, "timestamp": "2025-09-10 02:26:51.558679", "step": 5178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:51.590202", "step": 5178, "epoch": 3 }, { "type": "loss", "content": 0.0005889888852834702, "timestamp": "2025-09-10 02:26:51.594559", "step": 5179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:51.625517", "step": 5179, "epoch": 3 }, { "type": "loss", "content": 0.00023044981935527176, "timestamp": "2025-09-10 02:26:51.653377", "step": 5180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:51.684595", "step": 5180, "epoch": 3 }, { "type": "loss", "content": 0.0009921352611854672, "timestamp": "2025-09-10 02:26:51.689662", "step": 5181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:51.720180", "step": 5181, "epoch": 3 }, { "type": "loss", "content": 0.0005538056720979512, "timestamp": "2025-09-10 02:26:51.731255", "step": 5182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:51.762227", "step": 5182, "epoch": 3 }, { "type": "loss", "content": 0.0023367933463305235, "timestamp": "2025-09-10 02:26:51.772542", "step": 5183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:51.804192", "step": 5183, "epoch": 3 }, { "type": "loss", "content": 0.0002975693787448108, "timestamp": "2025-09-10 02:26:51.832091", "step": 5184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:51.864290", "step": 5184, "epoch": 3 }, { "type": "loss", "content": 0.0006394670926965773, "timestamp": "2025-09-10 02:26:51.871192", "step": 5185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:51.902617", "step": 5185, "epoch": 3 }, { "type": "loss", "content": 0.0005975649692118168, "timestamp": "2025-09-10 02:26:51.904913", "step": 5186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:51.937164", "step": 5186, "epoch": 3 }, { "type": "loss", "content": 0.0004425121296662837, "timestamp": "2025-09-10 02:26:51.943970", "step": 5187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:51.975672", "step": 5187, "epoch": 3 }, { "type": "loss", "content": 0.00035937223583459854, "timestamp": "2025-09-10 02:26:52.003857", "step": 5188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:52.035467", "step": 5188, "epoch": 3 }, { "type": "loss", "content": 0.0011359489290043712, "timestamp": "2025-09-10 02:26:52.037712", "step": 5189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:52.068998", "step": 5189, "epoch": 3 }, { "type": "loss", "content": 0.003917438443750143, "timestamp": "2025-09-10 02:26:52.075712", "step": 5190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:52.107588", "step": 5190, "epoch": 3 }, { "type": "loss", "content": 0.0001755694829626009, "timestamp": "2025-09-10 02:26:52.114296", "step": 5191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:52.146621", "step": 5191, "epoch": 3 }, { "type": "loss", "content": 0.0007622348493896425, "timestamp": "2025-09-10 02:26:52.174521", "step": 5192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:52.206264", "step": 5192, "epoch": 3 }, { "type": "loss", "content": 0.00104383728466928, "timestamp": "2025-09-10 02:26:52.213874", "step": 5193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:52.246717", "step": 5193, "epoch": 3 }, { "type": "loss", "content": 0.00010719360579969361, "timestamp": "2025-09-10 02:26:52.250995", "step": 5194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:52.282112", "step": 5194, "epoch": 3 }, { "type": "loss", "content": 0.00022146674746181816, "timestamp": "2025-09-10 02:26:52.289447", "step": 5195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:52.321360", "step": 5195, "epoch": 3 }, { "type": "loss", "content": 0.000337046105414629, "timestamp": "2025-09-10 02:26:52.349962", "step": 5196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:52.381024", "step": 5196, "epoch": 3 }, { "type": "loss", "content": 0.0005232029943726957, "timestamp": "2025-09-10 02:26:52.385715", "step": 5197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:52.416162", "step": 5197, "epoch": 3 }, { "type": "loss", "content": 0.00024187321832869202, "timestamp": "2025-09-10 02:26:52.426355", "step": 5198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:52.457633", "step": 5198, "epoch": 3 }, { "type": "loss", "content": 0.026408951729536057, "timestamp": "2025-09-10 02:26:52.462089", "step": 5199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:52.500650", "step": 5199, "epoch": 3 }, { "type": "loss", "content": 0.00909572746604681, "timestamp": "2025-09-10 02:26:52.537239", "step": 5200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:52.568974", "step": 5200, "epoch": 3 }, { "type": "loss", "content": 0.00032603865838609636, "timestamp": "2025-09-10 02:26:52.573368", "step": 5201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:52.603891", "step": 5201, "epoch": 3 }, { "type": "loss", "content": 0.005753154866397381, "timestamp": "2025-09-10 02:26:52.610681", "step": 5202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:52.641795", "step": 5202, "epoch": 3 }, { "type": "loss", "content": 0.0010426250519230962, "timestamp": "2025-09-10 02:26:52.649517", "step": 5203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:26:52.689718", "step": 5203, "epoch": 3 }, { "type": "loss", "content": 0.0064912172965705395, "timestamp": "2025-09-10 02:26:52.726772", "step": 5204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:52.759662", "step": 5204, "epoch": 3 }, { "type": "loss", "content": 0.00025196291971951723, "timestamp": "2025-09-10 02:26:52.761919", "step": 5205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:52.796838", "step": 5205, "epoch": 3 }, { "type": "loss", "content": 0.0011967868776991963, "timestamp": "2025-09-10 02:26:52.810487", "step": 5206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:52.842346", "step": 5206, "epoch": 3 }, { "type": "loss", "content": 0.0008071462507359684, "timestamp": "2025-09-10 02:26:52.849531", "step": 5207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:52.881484", "step": 5207, "epoch": 3 }, { "type": "loss", "content": 0.00036838999949395657, "timestamp": "2025-09-10 02:26:52.908146", "step": 5208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:52.947984", "step": 5208, "epoch": 3 }, { "type": "loss", "content": 0.0002956208190880716, "timestamp": "2025-09-10 02:26:52.952213", "step": 5209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:52.984034", "step": 5209, "epoch": 3 }, { "type": "loss", "content": 0.003162443172186613, "timestamp": "2025-09-10 02:26:52.986347", "step": 5210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:53.016915", "step": 5210, "epoch": 3 }, { "type": "loss", "content": 0.0004851007543038577, "timestamp": "2025-09-10 02:26:53.019203", "step": 5211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:26:53.057262", "step": 5211, "epoch": 3 }, { "type": "loss", "content": 0.0001697681873338297, "timestamp": "2025-09-10 02:26:53.094094", "step": 5212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:53.124931", "step": 5212, "epoch": 3 }, { "type": "loss", "content": 0.0003544171922840178, "timestamp": "2025-09-10 02:26:53.126906", "step": 5213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:53.158968", "step": 5213, "epoch": 3 }, { "type": "loss", "content": 0.007984244264662266, "timestamp": "2025-09-10 02:26:53.163284", "step": 5214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:53.194990", "step": 5214, "epoch": 3 }, { "type": "loss", "content": 0.0009763463167473674, "timestamp": "2025-09-10 02:26:53.202091", "step": 5215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:53.233627", "step": 5215, "epoch": 3 }, { "type": "loss", "content": 8.645112393423915e-05, "timestamp": "2025-09-10 02:26:53.264071", "step": 5216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:53.296248", "step": 5216, "epoch": 3 }, { "type": "loss", "content": 0.00014072064368519932, "timestamp": "2025-09-10 02:26:53.300402", "step": 5217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:53.332383", "step": 5217, "epoch": 3 }, { "type": "loss", "content": 0.012747065164148808, "timestamp": "2025-09-10 02:26:53.341941", "step": 5218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:26:53.375345", "step": 5218, "epoch": 3 }, { "type": "loss", "content": 0.00011503745190566406, "timestamp": "2025-09-10 02:26:53.389105", "step": 5219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:53.422086", "step": 5219, "epoch": 3 }, { "type": "loss", "content": 0.00017217174172401428, "timestamp": "2025-09-10 02:26:53.450612", "step": 5220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:53.482505", "step": 5220, "epoch": 3 }, { "type": "loss", "content": 0.0002020140818785876, "timestamp": "2025-09-10 02:26:53.487460", "step": 5221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:53.520956", "step": 5221, "epoch": 3 }, { "type": "loss", "content": 0.000206151555175893, "timestamp": "2025-09-10 02:26:53.528561", "step": 5222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:53.560359", "step": 5222, "epoch": 3 }, { "type": "loss", "content": 0.0019689316395670176, "timestamp": "2025-09-10 02:26:53.571892", "step": 5223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:53.602541", "step": 5223, "epoch": 3 }, { "type": "loss", "content": 0.0025810713414102793, "timestamp": "2025-09-10 02:26:53.635699", "step": 5224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:26:53.671170", "step": 5224, "epoch": 3 }, { "type": "loss", "content": 0.005631653126329184, "timestamp": "2025-09-10 02:26:53.684263", "step": 5225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:53.716685", "step": 5225, "epoch": 3 }, { "type": "loss", "content": 0.0002063760912278667, "timestamp": "2025-09-10 02:26:53.725994", "step": 5226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:53.757179", "step": 5226, "epoch": 3 }, { "type": "loss", "content": 0.010216274298727512, "timestamp": "2025-09-10 02:26:53.764700", "step": 5227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:26:53.802766", "step": 5227, "epoch": 3 }, { "type": "loss", "content": 0.001996259670704603, "timestamp": "2025-09-10 02:26:53.839561", "step": 5228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:53.869898", "step": 5228, "epoch": 3 }, { "type": "loss", "content": 0.0011374764144420624, "timestamp": "2025-09-10 02:26:53.874083", "step": 5229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:26:53.908877", "step": 5229, "epoch": 3 }, { "type": "loss", "content": 0.0006331949844025075, "timestamp": "2025-09-10 02:26:53.922647", "step": 5230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:53.955933", "step": 5230, "epoch": 3 }, { "type": "loss", "content": 0.0007114322506822646, "timestamp": "2025-09-10 02:26:53.969325", "step": 5231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:54.001297", "step": 5231, "epoch": 3 }, { "type": "loss", "content": 0.004758648574352264, "timestamp": "2025-09-10 02:26:54.026232", "step": 5232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:54.058661", "step": 5232, "epoch": 3 }, { "type": "loss", "content": 0.00021887525508645922, "timestamp": "2025-09-10 02:26:54.066298", "step": 5233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:54.097606", "step": 5233, "epoch": 3 }, { "type": "loss", "content": 0.0062928879633545876, "timestamp": "2025-09-10 02:26:54.101720", "step": 5234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:54.132929", "step": 5234, "epoch": 3 }, { "type": "loss", "content": 0.0007375451386906207, "timestamp": "2025-09-10 02:26:54.140376", "step": 5235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:54.173713", "step": 5235, "epoch": 3 }, { "type": "loss", "content": 0.00021762121468782425, "timestamp": "2025-09-10 02:26:54.198807", "step": 5236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:54.231943", "step": 5236, "epoch": 3 }, { "type": "loss", "content": 0.0011317295720800757, "timestamp": "2025-09-10 02:26:54.241667", "step": 5237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:54.275801", "step": 5237, "epoch": 3 }, { "type": "loss", "content": 0.00016049954865593463, "timestamp": "2025-09-10 02:26:54.283440", "step": 5238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:54.313598", "step": 5238, "epoch": 3 }, { "type": "loss", "content": 0.01704251952469349, "timestamp": "2025-09-10 02:26:54.316005", "step": 5239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:54.348099", "step": 5239, "epoch": 3 }, { "type": "loss", "content": 0.001351992366835475, "timestamp": "2025-09-10 02:26:54.376305", "step": 5240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:54.407460", "step": 5240, "epoch": 3 }, { "type": "loss", "content": 0.0004043028748128563, "timestamp": "2025-09-10 02:26:54.411662", "step": 5241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:54.442179", "step": 5241, "epoch": 3 }, { "type": "loss", "content": 0.005190826021134853, "timestamp": "2025-09-10 02:26:54.451994", "step": 5242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:54.483998", "step": 5242, "epoch": 3 }, { "type": "loss", "content": 0.0003823291917797178, "timestamp": "2025-09-10 02:26:54.491018", "step": 5243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:54.521728", "step": 5243, "epoch": 3 }, { "type": "loss", "content": 0.0018407927127555013, "timestamp": "2025-09-10 02:26:54.550143", "step": 5244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:54.583304", "step": 5244, "epoch": 3 }, { "type": "loss", "content": 5.925807636231184e-05, "timestamp": "2025-09-10 02:26:54.592363", "step": 5245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:54.623476", "step": 5245, "epoch": 3 }, { "type": "loss", "content": 0.002199393231421709, "timestamp": "2025-09-10 02:26:54.630980", "step": 5246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:26:54.672924", "step": 5246, "epoch": 3 }, { "type": "loss", "content": 0.00037568985135294497, "timestamp": "2025-09-10 02:26:54.690237", "step": 5247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:54.721466", "step": 5247, "epoch": 3 }, { "type": "loss", "content": 0.0037835666444152594, "timestamp": "2025-09-10 02:26:54.749018", "step": 5248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:54.782708", "step": 5248, "epoch": 3 }, { "type": "loss", "content": 0.00022347843332681805, "timestamp": "2025-09-10 02:26:54.785142", "step": 5249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 688 ], "flops": 20408222954560 }, "timestamp": "2025-09-10 02:26:54.841596", "step": 5249, "epoch": 3 }, { "type": "loss", "content": 7.557055505458266e-05, "timestamp": "2025-09-10 02:26:54.865716", "step": 5250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:54.898591", "step": 5250, "epoch": 3 }, { "type": "loss", "content": 0.002831138903275132, "timestamp": "2025-09-10 02:26:54.900995", "step": 5251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:54.932463", "step": 5251, "epoch": 3 }, { "type": "loss", "content": 0.0007521641673520207, "timestamp": "2025-09-10 02:26:54.960783", "step": 5252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:54.991912", "step": 5252, "epoch": 3 }, { "type": "loss", "content": 9.602372301742435e-05, "timestamp": "2025-09-10 02:26:54.996710", "step": 5253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:55.027852", "step": 5253, "epoch": 3 }, { "type": "loss", "content": 0.014525480568408966, "timestamp": "2025-09-10 02:26:55.030309", "step": 5254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:55.061017", "step": 5254, "epoch": 3 }, { "type": "loss", "content": 8.630308730062097e-05, "timestamp": "2025-09-10 02:26:55.072516", "step": 5255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:26:55.104329", "step": 5255, "epoch": 3 }, { "type": "loss", "content": 0.00013203138951212168, "timestamp": "2025-09-10 02:26:55.132507", "step": 5256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:55.164602", "step": 5256, "epoch": 3 }, { "type": "loss", "content": 0.0021953012328594923, "timestamp": "2025-09-10 02:26:55.169052", "step": 5257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:26:55.206701", "step": 5257, "epoch": 3 }, { "type": "loss", "content": 0.017087912186980247, "timestamp": "2025-09-10 02:26:55.222355", "step": 5258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:55.253490", "step": 5258, "epoch": 3 }, { "type": "loss", "content": 0.00040845529292710125, "timestamp": "2025-09-10 02:26:55.260749", "step": 5259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 560 ], "flops": 16611393146432 }, "timestamp": "2025-09-10 02:26:55.308329", "step": 5259, "epoch": 3 }, { "type": "loss", "content": 9.842081635724753e-05, "timestamp": "2025-09-10 02:26:55.348588", "step": 5260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:55.379379", "step": 5260, "epoch": 3 }, { "type": "loss", "content": 0.0008445015409961343, "timestamp": "2025-09-10 02:26:55.383680", "step": 5261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:55.415441", "step": 5261, "epoch": 3 }, { "type": "loss", "content": 0.00015750537568237633, "timestamp": "2025-09-10 02:26:55.424919", "step": 5262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:55.456799", "step": 5262, "epoch": 3 }, { "type": "loss", "content": 0.0009556380682624876, "timestamp": "2025-09-10 02:26:55.463257", "step": 5263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.500776", "step": 5263, "epoch": 3 }, { "type": "loss", "content": 0.00016424224304500967, "timestamp": "2025-09-10 02:26:55.525896", "step": 5264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:55.557462", "step": 5264, "epoch": 3 }, { "type": "loss", "content": 0.00011993583757430315, "timestamp": "2025-09-10 02:26:55.567142", "step": 5265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:55.598056", "step": 5265, "epoch": 3 }, { "type": "loss", "content": 0.00017937307711690664, "timestamp": "2025-09-10 02:26:55.609593", "step": 5266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:55.641151", "step": 5266, "epoch": 3 }, { "type": "loss", "content": 0.0018583576893433928, "timestamp": "2025-09-10 02:26:55.647912", "step": 5267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:55.680586", "step": 5267, "epoch": 3 }, { "type": "loss", "content": 0.002548788907006383, "timestamp": "2025-09-10 02:26:55.713197", "step": 5268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.744559", "step": 5268, "epoch": 3 }, { "type": "loss", "content": 0.00015272719610948116, "timestamp": "2025-09-10 02:26:55.748061", "step": 5269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.780263", "step": 5269, "epoch": 3 }, { "type": "loss", "content": 0.00022310127678792924, "timestamp": "2025-09-10 02:26:55.784449", "step": 5270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.814738", "step": 5270, "epoch": 3 }, { "type": "loss", "content": 0.000734326837118715, "timestamp": "2025-09-10 02:26:55.818917", "step": 5271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:26:55.854007", "step": 5271, "epoch": 3 }, { "type": "loss", "content": 0.0004884271766059101, "timestamp": "2025-09-10 02:26:55.888955", "step": 5272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.920093", "step": 5272, "epoch": 3 }, { "type": "loss", "content": 0.0021002234425395727, "timestamp": "2025-09-10 02:26:55.922221", "step": 5273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:26:55.952507", "step": 5273, "epoch": 3 }, { "type": "loss", "content": 4.44793731730897e-05, "timestamp": "2025-09-10 02:26:55.956751", "step": 5274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:26:55.987419", "step": 5274, "epoch": 3 }, { "type": "loss", "content": 9.469302312936634e-05, "timestamp": "2025-09-10 02:26:55.994191", "step": 5275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:26:56.026153", "step": 5275, "epoch": 3 }, { "type": "loss", "content": 0.0009514411212876439, "timestamp": "2025-09-10 02:26:56.058722", "step": 5276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:26:56.090585", "step": 5276, "epoch": 3 }, { "type": "loss", "content": 6.029165888321586e-05, "timestamp": "2025-09-10 02:26:56.103707", "step": 5277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:56.134258", "step": 5277, "epoch": 3 }, { "type": "loss", "content": 0.03702101483941078, "timestamp": "2025-09-10 02:26:56.138049", "step": 5278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:56.169637", "step": 5278, "epoch": 3 }, { "type": "loss", "content": 7.557779463240877e-05, "timestamp": "2025-09-10 02:26:56.172049", "step": 5279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:56.204160", "step": 5279, "epoch": 3 }, { "type": "loss", "content": 0.0008645829511806369, "timestamp": "2025-09-10 02:26:56.235523", "step": 5280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:56.267832", "step": 5280, "epoch": 3 }, { "type": "loss", "content": 0.00013446787488646805, "timestamp": "2025-09-10 02:26:56.271949", "step": 5281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:26:56.304519", "step": 5281, "epoch": 3 }, { "type": "loss", "content": 0.003033523913472891, "timestamp": "2025-09-10 02:26:56.306908", "step": 5282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:56.337846", "step": 5282, "epoch": 3 }, { "type": "loss", "content": 0.0018720559310168028, "timestamp": "2025-09-10 02:26:56.341602", "step": 5283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:26:56.373785", "step": 5283, "epoch": 3 }, { "type": "loss", "content": 0.0002793918247334659, "timestamp": "2025-09-10 02:26:56.404155", "step": 5284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:26:56.438641", "step": 5284, "epoch": 3 }, { "type": "loss", "content": 0.05163096636533737, "timestamp": "2025-09-10 02:26:56.441011", "step": 5285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:26:56.472033", "step": 5285, "epoch": 3 }, { "type": "loss", "content": 0.009458529762923717, "timestamp": "2025-09-10 02:26:56.479305", "step": 5286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:26:56.520156", "step": 5286, "epoch": 3 }, { "type": "loss", "content": 0.0008849184960126877, "timestamp": "2025-09-10 02:26:56.537189", "step": 5287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:26:56.569025", "step": 5287, "epoch": 3 }, { "type": "loss", "content": 0.0013038819888606668, "timestamp": "2025-09-10 02:26:56.602016", "step": 5288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:56.633736", "step": 5288, "epoch": 3 }, { "type": "loss", "content": 0.04861394315958023, "timestamp": "2025-09-10 02:26:56.638075", "step": 5289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:26:56.668284", "step": 5289, "epoch": 3 }, { "type": "loss", "content": 0.004373115487396717, "timestamp": "2025-09-10 02:26:56.674804", "step": 5290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:26:56.707345", "step": 5290, "epoch": 3 }, { "type": "loss", "content": 0.00035963160917162895, "timestamp": "2025-09-10 02:26:56.717659", "step": 5291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:26:56.750783", "step": 5291, "epoch": 3 }, { "type": "loss", "content": 0.00020156674145255238, "timestamp": "2025-09-10 02:26:56.785068", "step": 5292, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:27:06.874113", "step": 5292, "epoch": 3 }, { "type": "pplx", "content": 19403711.464340024, "timestamp": "2025-09-10 02:27:06.877312", "step": 5292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:06.907901", "step": 5292, "epoch": 3 }, { "type": "loss", "content": 0.00023475231137126684, "timestamp": "2025-09-10 02:27:06.909938", "step": 5293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:06.942794", "step": 5293, "epoch": 3 }, { "type": "loss", "content": 0.0001284535537706688, "timestamp": "2025-09-10 02:27:06.952035", "step": 5294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:06.983428", "step": 5294, "epoch": 3 }, { "type": "loss", "content": 0.003995387349277735, "timestamp": "2025-09-10 02:27:06.991032", "step": 5295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:07.022414", "step": 5295, "epoch": 3 }, { "type": "loss", "content": 0.00036020742845721543, "timestamp": "2025-09-10 02:27:07.047476", "step": 5296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:07.079413", "step": 5296, "epoch": 3 }, { "type": "loss", "content": 0.0002324790257262066, "timestamp": "2025-09-10 02:27:07.081576", "step": 5297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:07.113372", "step": 5297, "epoch": 3 }, { "type": "loss", "content": 0.0002664544736035168, "timestamp": "2025-09-10 02:27:07.125753", "step": 5298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:07.158414", "step": 5298, "epoch": 3 }, { "type": "loss", "content": 0.004768196027725935, "timestamp": "2025-09-10 02:27:07.165119", "step": 5299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:07.197411", "step": 5299, "epoch": 3 }, { "type": "loss", "content": 0.0005364773096516728, "timestamp": "2025-09-10 02:27:07.228444", "step": 5300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:07.260805", "step": 5300, "epoch": 3 }, { "type": "loss", "content": 0.005759544670581818, "timestamp": "2025-09-10 02:27:07.264918", "step": 5301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:07.296947", "step": 5301, "epoch": 3 }, { "type": "loss", "content": 0.0015448706690222025, "timestamp": "2025-09-10 02:27:07.304275", "step": 5302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 816 ], "flops": 24205052762688 }, "timestamp": "2025-09-10 02:27:07.372258", "step": 5302, "epoch": 3 }, { "type": "loss", "content": 9.88330357358791e-05, "timestamp": "2025-09-10 02:27:07.400778", "step": 5303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:07.433304", "step": 5303, "epoch": 3 }, { "type": "loss", "content": 0.0007378348964266479, "timestamp": "2025-09-10 02:27:07.461302", "step": 5304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:07.494345", "step": 5304, "epoch": 3 }, { "type": "loss", "content": 0.00042900207336060703, "timestamp": "2025-09-10 02:27:07.500713", "step": 5305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:07.533228", "step": 5305, "epoch": 3 }, { "type": "loss", "content": 0.0002336119068786502, "timestamp": "2025-09-10 02:27:07.544426", "step": 5306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:07.576992", "step": 5306, "epoch": 3 }, { "type": "loss", "content": 7.151837053243071e-05, "timestamp": "2025-09-10 02:27:07.580801", "step": 5307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:07.612631", "step": 5307, "epoch": 3 }, { "type": "loss", "content": 0.0003323144337628037, "timestamp": "2025-09-10 02:27:07.640830", "step": 5308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:07.671961", "step": 5308, "epoch": 3 }, { "type": "loss", "content": 0.0012035273248329759, "timestamp": "2025-09-10 02:27:07.674138", "step": 5309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:27:07.709551", "step": 5309, "epoch": 3 }, { "type": "loss", "content": 0.0002544188464526087, "timestamp": "2025-09-10 02:27:07.723594", "step": 5310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:07.755728", "step": 5310, "epoch": 3 }, { "type": "loss", "content": 0.0003566377272363752, "timestamp": "2025-09-10 02:27:07.762625", "step": 5311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:07.793161", "step": 5311, "epoch": 3 }, { "type": "loss", "content": 0.0005627021309919655, "timestamp": "2025-09-10 02:27:07.821649", "step": 5312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:07.853329", "step": 5312, "epoch": 3 }, { "type": "loss", "content": 0.0019646212458610535, "timestamp": "2025-09-10 02:27:07.858432", "step": 5313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:07.889314", "step": 5313, "epoch": 3 }, { "type": "loss", "content": 0.013626412488520145, "timestamp": "2025-09-10 02:27:07.896849", "step": 5314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:07.927894", "step": 5314, "epoch": 3 }, { "type": "loss", "content": 0.0002811032463796437, "timestamp": "2025-09-10 02:27:07.934716", "step": 5315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:07.968017", "step": 5315, "epoch": 3 }, { "type": "loss", "content": 0.00022139211068861187, "timestamp": "2025-09-10 02:27:07.995554", "step": 5316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:08.029335", "step": 5316, "epoch": 3 }, { "type": "loss", "content": 0.00019662485283333808, "timestamp": "2025-09-10 02:27:08.038048", "step": 5317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:08.069272", "step": 5317, "epoch": 3 }, { "type": "loss", "content": 0.0002974488597828895, "timestamp": "2025-09-10 02:27:08.081644", "step": 5318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:27:08.116905", "step": 5318, "epoch": 3 }, { "type": "loss", "content": 0.0016499229241162539, "timestamp": "2025-09-10 02:27:08.130677", "step": 5319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:08.162174", "step": 5319, "epoch": 3 }, { "type": "loss", "content": 0.0001375975989503786, "timestamp": "2025-09-10 02:27:08.189861", "step": 5320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:08.221651", "step": 5320, "epoch": 3 }, { "type": "loss", "content": 0.00034185725962743163, "timestamp": "2025-09-10 02:27:08.229769", "step": 5321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:08.262367", "step": 5321, "epoch": 3 }, { "type": "loss", "content": 4.766142592416145e-05, "timestamp": "2025-09-10 02:27:08.266626", "step": 5322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:08.297298", "step": 5322, "epoch": 3 }, { "type": "loss", "content": 0.00020192751253489405, "timestamp": "2025-09-10 02:27:08.304114", "step": 5323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:08.334710", "step": 5323, "epoch": 3 }, { "type": "loss", "content": 0.00017714654677547514, "timestamp": "2025-09-10 02:27:08.362803", "step": 5324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:08.393965", "step": 5324, "epoch": 3 }, { "type": "loss", "content": 0.0014676746213808656, "timestamp": "2025-09-10 02:27:08.399243", "step": 5325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:08.432061", "step": 5325, "epoch": 3 }, { "type": "loss", "content": 0.0007701607537455857, "timestamp": "2025-09-10 02:27:08.442083", "step": 5326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:08.485836", "step": 5326, "epoch": 3 }, { "type": "loss", "content": 0.0003435301478020847, "timestamp": "2025-09-10 02:27:08.499222", "step": 5327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:08.533247", "step": 5327, "epoch": 3 }, { "type": "loss", "content": 8.765466918703169e-05, "timestamp": "2025-09-10 02:27:08.565826", "step": 5328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:08.596972", "step": 5328, "epoch": 3 }, { "type": "loss", "content": 0.00012318461085669696, "timestamp": "2025-09-10 02:27:08.599247", "step": 5329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:08.633232", "step": 5329, "epoch": 3 }, { "type": "loss", "content": 0.0016483607469126582, "timestamp": "2025-09-10 02:27:08.646899", "step": 5330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:08.677609", "step": 5330, "epoch": 3 }, { "type": "loss", "content": 0.0008386906119994819, "timestamp": "2025-09-10 02:27:08.684996", "step": 5331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:08.718082", "step": 5331, "epoch": 3 }, { "type": "loss", "content": 4.022822031402029e-05, "timestamp": "2025-09-10 02:27:08.746436", "step": 5332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:08.779520", "step": 5332, "epoch": 3 }, { "type": "loss", "content": 0.04045605659484863, "timestamp": "2025-09-10 02:27:08.784700", "step": 5333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:08.821111", "step": 5333, "epoch": 3 }, { "type": "loss", "content": 8.697760495124385e-05, "timestamp": "2025-09-10 02:27:08.830598", "step": 5334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:08.863455", "step": 5334, "epoch": 3 }, { "type": "loss", "content": 0.0010809339582920074, "timestamp": "2025-09-10 02:27:08.874263", "step": 5335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:08.905338", "step": 5335, "epoch": 3 }, { "type": "loss", "content": 4.760713272844441e-05, "timestamp": "2025-09-10 02:27:08.931014", "step": 5336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:08.966761", "step": 5336, "epoch": 3 }, { "type": "loss", "content": 5.413155668065883e-05, "timestamp": "2025-09-10 02:27:08.972108", "step": 5337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:09.005498", "step": 5337, "epoch": 3 }, { "type": "loss", "content": 0.0001805043575586751, "timestamp": "2025-09-10 02:27:09.009564", "step": 5338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:09.043427", "step": 5338, "epoch": 3 }, { "type": "loss", "content": 0.0475006178021431, "timestamp": "2025-09-10 02:27:09.050181", "step": 5339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:09.085556", "step": 5339, "epoch": 3 }, { "type": "loss", "content": 0.0001535638002678752, "timestamp": "2025-09-10 02:27:09.110509", "step": 5340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:09.141774", "step": 5340, "epoch": 3 }, { "type": "loss", "content": 0.0067802309058606625, "timestamp": "2025-09-10 02:27:09.146917", "step": 5341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:09.186897", "step": 5341, "epoch": 3 }, { "type": "loss", "content": 0.02059916779398918, "timestamp": "2025-09-10 02:27:09.195909", "step": 5342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:09.228865", "step": 5342, "epoch": 3 }, { "type": "loss", "content": 0.00022535616881214082, "timestamp": "2025-09-10 02:27:09.235276", "step": 5343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:09.270492", "step": 5343, "epoch": 3 }, { "type": "loss", "content": 0.0002255578147014603, "timestamp": "2025-09-10 02:27:09.301963", "step": 5344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:09.338400", "step": 5344, "epoch": 3 }, { "type": "loss", "content": 8.521532436134294e-05, "timestamp": "2025-09-10 02:27:09.341414", "step": 5345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:09.380109", "step": 5345, "epoch": 3 }, { "type": "loss", "content": 5.905913349124603e-05, "timestamp": "2025-09-10 02:27:09.386648", "step": 5346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:09.421685", "step": 5346, "epoch": 3 }, { "type": "loss", "content": 0.0005463002598844469, "timestamp": "2025-09-10 02:27:09.428072", "step": 5347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:09.461412", "step": 5347, "epoch": 3 }, { "type": "loss", "content": 0.00023405032698065042, "timestamp": "2025-09-10 02:27:09.488946", "step": 5348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:09.524728", "step": 5348, "epoch": 3 }, { "type": "loss", "content": 0.0009855531388893723, "timestamp": "2025-09-10 02:27:09.528887", "step": 5349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:09.560327", "step": 5349, "epoch": 3 }, { "type": "loss", "content": 0.0001291104854317382, "timestamp": "2025-09-10 02:27:09.572055", "step": 5350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:09.603665", "step": 5350, "epoch": 3 }, { "type": "loss", "content": 0.0029408042319118977, "timestamp": "2025-09-10 02:27:09.610845", "step": 5351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:09.641288", "step": 5351, "epoch": 3 }, { "type": "loss", "content": 0.007272699382156134, "timestamp": "2025-09-10 02:27:09.665079", "step": 5352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:09.696094", "step": 5352, "epoch": 3 }, { "type": "loss", "content": 0.00021549421944655478, "timestamp": "2025-09-10 02:27:09.700554", "step": 5353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:09.731962", "step": 5353, "epoch": 3 }, { "type": "loss", "content": 0.00029168991022743285, "timestamp": "2025-09-10 02:27:09.744166", "step": 5354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:09.777081", "step": 5354, "epoch": 3 }, { "type": "loss", "content": 0.0001440942141925916, "timestamp": "2025-09-10 02:27:09.789247", "step": 5355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:09.820693", "step": 5355, "epoch": 3 }, { "type": "loss", "content": 0.0006720181554555893, "timestamp": "2025-09-10 02:27:09.848205", "step": 5356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:09.881714", "step": 5356, "epoch": 3 }, { "type": "loss", "content": 0.0011303488863632083, "timestamp": "2025-09-10 02:27:09.886340", "step": 5357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:09.918098", "step": 5357, "epoch": 3 }, { "type": "loss", "content": 0.0002597134152892977, "timestamp": "2025-09-10 02:27:09.929734", "step": 5358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:09.963169", "step": 5358, "epoch": 3 }, { "type": "loss", "content": 0.00024505850160494447, "timestamp": "2025-09-10 02:27:09.967403", "step": 5359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:09.998959", "step": 5359, "epoch": 3 }, { "type": "loss", "content": 0.0003296424984000623, "timestamp": "2025-09-10 02:27:10.024044", "step": 5360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:10.055241", "step": 5360, "epoch": 3 }, { "type": "loss", "content": 0.0009543310734443367, "timestamp": "2025-09-10 02:27:10.057851", "step": 5361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:10.091168", "step": 5361, "epoch": 3 }, { "type": "loss", "content": 0.00012925105693284422, "timestamp": "2025-09-10 02:27:10.098638", "step": 5362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:10.130346", "step": 5362, "epoch": 3 }, { "type": "loss", "content": 0.00022506927780341357, "timestamp": "2025-09-10 02:27:10.137015", "step": 5363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:10.175004", "step": 5363, "epoch": 3 }, { "type": "loss", "content": 0.00027559816953726113, "timestamp": "2025-09-10 02:27:10.211508", "step": 5364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:10.243345", "step": 5364, "epoch": 3 }, { "type": "loss", "content": 7.342889148276299e-05, "timestamp": "2025-09-10 02:27:10.247674", "step": 5365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:10.279417", "step": 5365, "epoch": 3 }, { "type": "loss", "content": 0.0005296029266901314, "timestamp": "2025-09-10 02:27:10.286937", "step": 5366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:27:10.321567", "step": 5366, "epoch": 3 }, { "type": "loss", "content": 0.00015757219807710499, "timestamp": "2025-09-10 02:27:10.335385", "step": 5367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:10.367196", "step": 5367, "epoch": 3 }, { "type": "loss", "content": 0.003111243713647127, "timestamp": "2025-09-10 02:27:10.394588", "step": 5368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:10.427238", "step": 5368, "epoch": 3 }, { "type": "loss", "content": 0.0011776711326092482, "timestamp": "2025-09-10 02:27:10.433995", "step": 5369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:10.465234", "step": 5369, "epoch": 3 }, { "type": "loss", "content": 0.00025284269941039383, "timestamp": "2025-09-10 02:27:10.477006", "step": 5370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:10.508081", "step": 5370, "epoch": 3 }, { "type": "loss", "content": 0.0006649411516264081, "timestamp": "2025-09-10 02:27:10.520422", "step": 5371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:10.551453", "step": 5371, "epoch": 3 }, { "type": "loss", "content": 0.001548093743622303, "timestamp": "2025-09-10 02:27:10.579634", "step": 5372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:10.611150", "step": 5372, "epoch": 3 }, { "type": "loss", "content": 0.0003271247842349112, "timestamp": "2025-09-10 02:27:10.615775", "step": 5373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:10.647350", "step": 5373, "epoch": 3 }, { "type": "loss", "content": 0.005300581920892, "timestamp": "2025-09-10 02:27:10.654716", "step": 5374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:10.687559", "step": 5374, "epoch": 3 }, { "type": "loss", "content": 0.0005442704423330724, "timestamp": "2025-09-10 02:27:10.694191", "step": 5375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:10.727002", "step": 5375, "epoch": 3 }, { "type": "loss", "content": 0.0001799498131731525, "timestamp": "2025-09-10 02:27:10.754945", "step": 5376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:10.788594", "step": 5376, "epoch": 3 }, { "type": "loss", "content": 0.0002708194369915873, "timestamp": "2025-09-10 02:27:10.793486", "step": 5377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:10.825847", "step": 5377, "epoch": 3 }, { "type": "loss", "content": 0.004976264201104641, "timestamp": "2025-09-10 02:27:10.837370", "step": 5378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:10.868671", "step": 5378, "epoch": 3 }, { "type": "loss", "content": 0.00040135084418579936, "timestamp": "2025-09-10 02:27:10.875444", "step": 5379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:10.909459", "step": 5379, "epoch": 3 }, { "type": "loss", "content": 0.0001543848484288901, "timestamp": "2025-09-10 02:27:10.934664", "step": 5380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:10.967418", "step": 5380, "epoch": 3 }, { "type": "loss", "content": 0.021023396402597427, "timestamp": "2025-09-10 02:27:10.974542", "step": 5381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:11.006146", "step": 5381, "epoch": 3 }, { "type": "loss", "content": 0.0035028725396841764, "timestamp": "2025-09-10 02:27:11.009911", "step": 5382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:11.041403", "step": 5382, "epoch": 3 }, { "type": "loss", "content": 0.0032905121333897114, "timestamp": "2025-09-10 02:27:11.048083", "step": 5383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:11.079922", "step": 5383, "epoch": 3 }, { "type": "loss", "content": 0.0008346849936060607, "timestamp": "2025-09-10 02:27:11.108219", "step": 5384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:11.139874", "step": 5384, "epoch": 3 }, { "type": "loss", "content": 0.00020959800167474896, "timestamp": "2025-09-10 02:27:11.144722", "step": 5385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:11.176522", "step": 5385, "epoch": 3 }, { "type": "loss", "content": 8.455058559775352e-05, "timestamp": "2025-09-10 02:27:11.186299", "step": 5386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.217775", "step": 5386, "epoch": 3 }, { "type": "loss", "content": 0.00021998195734340698, "timestamp": "2025-09-10 02:27:11.224526", "step": 5387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:27:11.264673", "step": 5387, "epoch": 3 }, { "type": "loss", "content": 0.021652230992913246, "timestamp": "2025-09-10 02:27:11.301731", "step": 5388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:11.334326", "step": 5388, "epoch": 3 }, { "type": "loss", "content": 0.0016941409558057785, "timestamp": "2025-09-10 02:27:11.338505", "step": 5389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:11.369358", "step": 5389, "epoch": 3 }, { "type": "loss", "content": 0.0002850510645657778, "timestamp": "2025-09-10 02:27:11.372473", "step": 5390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:11.403900", "step": 5390, "epoch": 3 }, { "type": "loss", "content": 0.0029228352941572666, "timestamp": "2025-09-10 02:27:11.410340", "step": 5391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.441958", "step": 5391, "epoch": 3 }, { "type": "loss", "content": 0.01626124419271946, "timestamp": "2025-09-10 02:27:11.469556", "step": 5392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:11.503536", "step": 5392, "epoch": 3 }, { "type": "loss", "content": 0.001065724529325962, "timestamp": "2025-09-10 02:27:11.512032", "step": 5393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:11.549882", "step": 5393, "epoch": 3 }, { "type": "loss", "content": 0.0003987684322055429, "timestamp": "2025-09-10 02:27:11.563574", "step": 5394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:11.595440", "step": 5394, "epoch": 3 }, { "type": "loss", "content": 0.0007257405668497086, "timestamp": "2025-09-10 02:27:11.599672", "step": 5395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.631068", "step": 5395, "epoch": 3 }, { "type": "loss", "content": 0.003991567995399237, "timestamp": "2025-09-10 02:27:11.658897", "step": 5396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:11.689820", "step": 5396, "epoch": 3 }, { "type": "loss", "content": 0.001748523791320622, "timestamp": "2025-09-10 02:27:11.695212", "step": 5397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:11.728539", "step": 5397, "epoch": 3 }, { "type": "loss", "content": 0.0017363729421049356, "timestamp": "2025-09-10 02:27:11.735372", "step": 5398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:11.774608", "step": 5398, "epoch": 3 }, { "type": "loss", "content": 0.0008585135801695287, "timestamp": "2025-09-10 02:27:11.790251", "step": 5399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:11.821849", "step": 5399, "epoch": 3 }, { "type": "loss", "content": 0.00017365036183036864, "timestamp": "2025-09-10 02:27:11.850439", "step": 5400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.881998", "step": 5400, "epoch": 3 }, { "type": "loss", "content": 0.0003011637891177088, "timestamp": "2025-09-10 02:27:11.886504", "step": 5401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.917983", "step": 5401, "epoch": 3 }, { "type": "loss", "content": 0.03174243122339249, "timestamp": "2025-09-10 02:27:11.925005", "step": 5402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:11.956591", "step": 5402, "epoch": 3 }, { "type": "loss", "content": 0.00016650068573653698, "timestamp": "2025-09-10 02:27:11.963318", "step": 5403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:11.995075", "step": 5403, "epoch": 3 }, { "type": "loss", "content": 0.0002632165269460529, "timestamp": "2025-09-10 02:27:12.027740", "step": 5404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:12.059862", "step": 5404, "epoch": 3 }, { "type": "loss", "content": 7.19372255844064e-05, "timestamp": "2025-09-10 02:27:12.063799", "step": 5405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:12.096073", "step": 5405, "epoch": 3 }, { "type": "loss", "content": 0.0001446278765797615, "timestamp": "2025-09-10 02:27:12.102727", "step": 5406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:12.134862", "step": 5406, "epoch": 3 }, { "type": "loss", "content": 0.0006326402653940022, "timestamp": "2025-09-10 02:27:12.141586", "step": 5407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:12.174294", "step": 5407, "epoch": 3 }, { "type": "loss", "content": 9.538559970678762e-05, "timestamp": "2025-09-10 02:27:12.202505", "step": 5408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:12.234243", "step": 5408, "epoch": 3 }, { "type": "loss", "content": 0.002074205782264471, "timestamp": "2025-09-10 02:27:12.238921", "step": 5409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:27:12.278081", "step": 5409, "epoch": 3 }, { "type": "loss", "content": 0.004352888558059931, "timestamp": "2025-09-10 02:27:12.294251", "step": 5410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:12.325875", "step": 5410, "epoch": 3 }, { "type": "loss", "content": 0.0003916619752999395, "timestamp": "2025-09-10 02:27:12.336292", "step": 5411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:12.367817", "step": 5411, "epoch": 3 }, { "type": "loss", "content": 6.0024420236004516e-05, "timestamp": "2025-09-10 02:27:12.395910", "step": 5412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:12.427527", "step": 5412, "epoch": 3 }, { "type": "loss", "content": 0.0003417480329517275, "timestamp": "2025-09-10 02:27:12.432346", "step": 5413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:12.464295", "step": 5413, "epoch": 3 }, { "type": "loss", "content": 0.04961821064352989, "timestamp": "2025-09-10 02:27:12.470898", "step": 5414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:12.503543", "step": 5414, "epoch": 3 }, { "type": "loss", "content": 0.000651273294351995, "timestamp": "2025-09-10 02:27:12.515505", "step": 5415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:12.547695", "step": 5415, "epoch": 3 }, { "type": "loss", "content": 0.0005461532273329794, "timestamp": "2025-09-10 02:27:12.574876", "step": 5416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:12.609568", "step": 5416, "epoch": 3 }, { "type": "loss", "content": 0.00011130100028822199, "timestamp": "2025-09-10 02:27:12.617143", "step": 5417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:12.648604", "step": 5417, "epoch": 3 }, { "type": "loss", "content": 0.004985901992768049, "timestamp": "2025-09-10 02:27:12.655668", "step": 5418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:12.687344", "step": 5418, "epoch": 3 }, { "type": "loss", "content": 0.00047179448301903903, "timestamp": "2025-09-10 02:27:12.699601", "step": 5419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:12.732923", "step": 5419, "epoch": 3 }, { "type": "loss", "content": 0.04157213494181633, "timestamp": "2025-09-10 02:27:12.757955", "step": 5420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:12.789802", "step": 5420, "epoch": 3 }, { "type": "loss", "content": 0.000143712037242949, "timestamp": "2025-09-10 02:27:12.799045", "step": 5421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:12.832584", "step": 5421, "epoch": 3 }, { "type": "loss", "content": 0.0028550736606121063, "timestamp": "2025-09-10 02:27:12.845910", "step": 5422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:12.878184", "step": 5422, "epoch": 3 }, { "type": "loss", "content": 0.0014410755829885602, "timestamp": "2025-09-10 02:27:12.885163", "step": 5423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:12.917557", "step": 5423, "epoch": 3 }, { "type": "loss", "content": 0.00013774879334960133, "timestamp": "2025-09-10 02:27:12.941980", "step": 5424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:12.974130", "step": 5424, "epoch": 3 }, { "type": "loss", "content": 0.0003147267270833254, "timestamp": "2025-09-10 02:27:12.986753", "step": 5425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:13.019181", "step": 5425, "epoch": 3 }, { "type": "loss", "content": 0.0036901801358908415, "timestamp": "2025-09-10 02:27:13.021451", "step": 5426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:13.052834", "step": 5426, "epoch": 3 }, { "type": "loss", "content": 9.131115803029388e-05, "timestamp": "2025-09-10 02:27:13.062813", "step": 5427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:13.094571", "step": 5427, "epoch": 3 }, { "type": "loss", "content": 0.00034638322540558875, "timestamp": "2025-09-10 02:27:13.121982", "step": 5428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:13.152789", "step": 5428, "epoch": 3 }, { "type": "loss", "content": 0.0003097867302130908, "timestamp": "2025-09-10 02:27:13.157434", "step": 5429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:13.188718", "step": 5429, "epoch": 3 }, { "type": "loss", "content": 0.0006205638055689633, "timestamp": "2025-09-10 02:27:13.198938", "step": 5430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:13.234382", "step": 5430, "epoch": 3 }, { "type": "loss", "content": 0.004306466784328222, "timestamp": "2025-09-10 02:27:13.248054", "step": 5431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:13.280356", "step": 5431, "epoch": 3 }, { "type": "loss", "content": 0.0013333893148228526, "timestamp": "2025-09-10 02:27:13.311264", "step": 5432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:13.348541", "step": 5432, "epoch": 3 }, { "type": "loss", "content": 0.003753043944016099, "timestamp": "2025-09-10 02:27:13.363655", "step": 5433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:13.395676", "step": 5433, "epoch": 3 }, { "type": "loss", "content": 0.00018160228501074016, "timestamp": "2025-09-10 02:27:13.406258", "step": 5434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:13.437458", "step": 5434, "epoch": 3 }, { "type": "loss", "content": 0.0016118159983307123, "timestamp": "2025-09-10 02:27:13.444359", "step": 5435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:13.475084", "step": 5435, "epoch": 3 }, { "type": "loss", "content": 0.00042566441697999835, "timestamp": "2025-09-10 02:27:13.506907", "step": 5436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:13.538508", "step": 5436, "epoch": 3 }, { "type": "loss", "content": 0.0005374281900003552, "timestamp": "2025-09-10 02:27:13.542905", "step": 5437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:13.573793", "step": 5437, "epoch": 3 }, { "type": "loss", "content": 0.0007307881605811417, "timestamp": "2025-09-10 02:27:13.580832", "step": 5438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:13.611475", "step": 5438, "epoch": 3 }, { "type": "loss", "content": 0.0004055744793731719, "timestamp": "2025-09-10 02:27:13.615784", "step": 5439, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:27:23.615052", "step": 5439, "epoch": 3 }, { "type": "pplx", "content": 20426999.100602426, "timestamp": "2025-09-10 02:27:23.619545", "step": 5439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:23.651955", "step": 5439, "epoch": 3 }, { "type": "loss", "content": 0.00016575765039306134, "timestamp": "2025-09-10 02:27:23.676033", "step": 5440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:23.705982", "step": 5440, "epoch": 3 }, { "type": "loss", "content": 0.00043351706699468195, "timestamp": "2025-09-10 02:27:23.707867", "step": 5441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:23.736882", "step": 5441, "epoch": 3 }, { "type": "loss", "content": 0.0011763531947508454, "timestamp": "2025-09-10 02:27:23.741399", "step": 5442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:23.779772", "step": 5442, "epoch": 3 }, { "type": "loss", "content": 0.0002184472105000168, "timestamp": "2025-09-10 02:27:23.795688", "step": 5443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:23.826560", "step": 5443, "epoch": 3 }, { "type": "loss", "content": 0.006947829853743315, "timestamp": "2025-09-10 02:27:23.853998", "step": 5444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:23.884265", "step": 5444, "epoch": 3 }, { "type": "loss", "content": 0.00013986548583488911, "timestamp": "2025-09-10 02:27:23.894618", "step": 5445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:23.925075", "step": 5445, "epoch": 3 }, { "type": "loss", "content": 0.001293918932788074, "timestamp": "2025-09-10 02:27:23.932472", "step": 5446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:23.962245", "step": 5446, "epoch": 3 }, { "type": "loss", "content": 0.0006181861972436309, "timestamp": "2025-09-10 02:27:23.973063", "step": 5447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:24.002436", "step": 5447, "epoch": 3 }, { "type": "loss", "content": 0.000459133880212903, "timestamp": "2025-09-10 02:27:24.027256", "step": 5448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:24.057967", "step": 5448, "epoch": 3 }, { "type": "loss", "content": 0.009505641646683216, "timestamp": "2025-09-10 02:27:24.062630", "step": 5449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:24.095581", "step": 5449, "epoch": 3 }, { "type": "loss", "content": 0.0008678924641571939, "timestamp": "2025-09-10 02:27:24.108930", "step": 5450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:24.147699", "step": 5450, "epoch": 3 }, { "type": "loss", "content": 0.0002460898831486702, "timestamp": "2025-09-10 02:27:24.163384", "step": 5451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:24.193907", "step": 5451, "epoch": 3 }, { "type": "loss", "content": 0.0009736836655065417, "timestamp": "2025-09-10 02:27:24.222218", "step": 5452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:24.251808", "step": 5452, "epoch": 3 }, { "type": "loss", "content": 0.0001476912439102307, "timestamp": "2025-09-10 02:27:24.257124", "step": 5453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:24.287170", "step": 5453, "epoch": 3 }, { "type": "loss", "content": 0.00234299642033875, "timestamp": "2025-09-10 02:27:24.289140", "step": 5454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:24.319252", "step": 5454, "epoch": 3 }, { "type": "loss", "content": 0.0003727012954186648, "timestamp": "2025-09-10 02:27:24.323506", "step": 5455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:24.353122", "step": 5455, "epoch": 3 }, { "type": "loss", "content": 0.00016866849910002202, "timestamp": "2025-09-10 02:27:24.380665", "step": 5456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:24.411019", "step": 5456, "epoch": 3 }, { "type": "loss", "content": 0.00030947296181693673, "timestamp": "2025-09-10 02:27:24.413046", "step": 5457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:24.443865", "step": 5457, "epoch": 3 }, { "type": "loss", "content": 0.0004413680580910295, "timestamp": "2025-09-10 02:27:24.448059", "step": 5458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:24.481001", "step": 5458, "epoch": 3 }, { "type": "loss", "content": 0.00032807476236484945, "timestamp": "2025-09-10 02:27:24.485453", "step": 5459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:24.515733", "step": 5459, "epoch": 3 }, { "type": "loss", "content": 0.00025154382456094027, "timestamp": "2025-09-10 02:27:24.549197", "step": 5460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:24.585862", "step": 5460, "epoch": 3 }, { "type": "loss", "content": 0.001103718881495297, "timestamp": "2025-09-10 02:27:24.590640", "step": 5461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:24.621484", "step": 5461, "epoch": 3 }, { "type": "loss", "content": 0.0004202440322842449, "timestamp": "2025-09-10 02:27:24.631686", "step": 5462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:24.662057", "step": 5462, "epoch": 3 }, { "type": "loss", "content": 0.0002868052397388965, "timestamp": "2025-09-10 02:27:24.669186", "step": 5463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:24.704523", "step": 5463, "epoch": 3 }, { "type": "loss", "content": 0.0005337927141226828, "timestamp": "2025-09-10 02:27:24.738823", "step": 5464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:24.773003", "step": 5464, "epoch": 3 }, { "type": "loss", "content": 0.000878959137480706, "timestamp": "2025-09-10 02:27:24.777704", "step": 5465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:24.807633", "step": 5465, "epoch": 3 }, { "type": "loss", "content": 0.00041015897295437753, "timestamp": "2025-09-10 02:27:24.814729", "step": 5466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:24.845250", "step": 5466, "epoch": 3 }, { "type": "loss", "content": 0.008768526837229729, "timestamp": "2025-09-10 02:27:24.856091", "step": 5467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:24.886823", "step": 5467, "epoch": 3 }, { "type": "loss", "content": 0.00015605808584950864, "timestamp": "2025-09-10 02:27:24.918482", "step": 5468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:24.948828", "step": 5468, "epoch": 3 }, { "type": "loss", "content": 0.00048326136311516166, "timestamp": "2025-09-10 02:27:24.954048", "step": 5469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:24.983806", "step": 5469, "epoch": 3 }, { "type": "loss", "content": 0.0007095988839864731, "timestamp": "2025-09-10 02:27:24.988163", "step": 5470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:25.018745", "step": 5470, "epoch": 3 }, { "type": "loss", "content": 0.0030129605438560247, "timestamp": "2025-09-10 02:27:25.026198", "step": 5471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:27:25.068239", "step": 5471, "epoch": 3 }, { "type": "loss", "content": 0.008913605473935604, "timestamp": "2025-09-10 02:27:25.106488", "step": 5472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:25.137125", "step": 5472, "epoch": 3 }, { "type": "loss", "content": 0.0003532721020746976, "timestamp": "2025-09-10 02:27:25.147566", "step": 5473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:25.177878", "step": 5473, "epoch": 3 }, { "type": "loss", "content": 0.0001848796382546425, "timestamp": "2025-09-10 02:27:25.184808", "step": 5474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:25.214781", "step": 5474, "epoch": 3 }, { "type": "loss", "content": 0.0004907096736133099, "timestamp": "2025-09-10 02:27:25.221615", "step": 5475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:25.252159", "step": 5475, "epoch": 3 }, { "type": "loss", "content": 0.0004714152601081878, "timestamp": "2025-09-10 02:27:25.285615", "step": 5476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:27:25.318176", "step": 5476, "epoch": 3 }, { "type": "loss", "content": 0.0014040955575183034, "timestamp": "2025-09-10 02:27:25.331456", "step": 5477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:25.361847", "step": 5477, "epoch": 3 }, { "type": "loss", "content": 0.0022547480184584856, "timestamp": "2025-09-10 02:27:25.368579", "step": 5478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:25.399396", "step": 5478, "epoch": 3 }, { "type": "loss", "content": 0.0004808087833225727, "timestamp": "2025-09-10 02:27:25.406197", "step": 5479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:25.436828", "step": 5479, "epoch": 3 }, { "type": "loss", "content": 0.00024939377908594906, "timestamp": "2025-09-10 02:27:25.464613", "step": 5480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:25.500516", "step": 5480, "epoch": 3 }, { "type": "loss", "content": 0.0003376358072273433, "timestamp": "2025-09-10 02:27:25.515696", "step": 5481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:25.545597", "step": 5481, "epoch": 3 }, { "type": "loss", "content": 0.00017180813301820308, "timestamp": "2025-09-10 02:27:25.552497", "step": 5482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:25.592196", "step": 5482, "epoch": 3 }, { "type": "loss", "content": 0.00040549953700974584, "timestamp": "2025-09-10 02:27:25.594957", "step": 5483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:25.625545", "step": 5483, "epoch": 3 }, { "type": "loss", "content": 0.0006368904723785818, "timestamp": "2025-09-10 02:27:25.658634", "step": 5484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:25.689629", "step": 5484, "epoch": 3 }, { "type": "loss", "content": 0.00018317217472940683, "timestamp": "2025-09-10 02:27:25.694781", "step": 5485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:25.724875", "step": 5485, "epoch": 3 }, { "type": "loss", "content": 0.0001097510103136301, "timestamp": "2025-09-10 02:27:25.728890", "step": 5486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:25.760354", "step": 5486, "epoch": 3 }, { "type": "loss", "content": 0.0001433340657968074, "timestamp": "2025-09-10 02:27:25.772468", "step": 5487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:25.802978", "step": 5487, "epoch": 3 }, { "type": "loss", "content": 0.0005603828467428684, "timestamp": "2025-09-10 02:27:25.831313", "step": 5488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:25.861933", "step": 5488, "epoch": 3 }, { "type": "loss", "content": 0.0009804172441363335, "timestamp": "2025-09-10 02:27:25.867054", "step": 5489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:25.896960", "step": 5489, "epoch": 3 }, { "type": "loss", "content": 0.029433060437440872, "timestamp": "2025-09-10 02:27:25.901378", "step": 5490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:25.932233", "step": 5490, "epoch": 3 }, { "type": "loss", "content": 0.0031838512513786554, "timestamp": "2025-09-10 02:27:25.938904", "step": 5491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:25.971123", "step": 5491, "epoch": 3 }, { "type": "loss", "content": 0.0012118567246943712, "timestamp": "2025-09-10 02:27:26.003044", "step": 5492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:26.033688", "step": 5492, "epoch": 3 }, { "type": "loss", "content": 7.553322211606428e-05, "timestamp": "2025-09-10 02:27:26.038348", "step": 5493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:26.068370", "step": 5493, "epoch": 3 }, { "type": "loss", "content": 0.0006994387367740273, "timestamp": "2025-09-10 02:27:26.075270", "step": 5494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:26.105413", "step": 5494, "epoch": 3 }, { "type": "loss", "content": 0.00013065806706435978, "timestamp": "2025-09-10 02:27:26.113048", "step": 5495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:26.147661", "step": 5495, "epoch": 3 }, { "type": "loss", "content": 0.0004590437456499785, "timestamp": "2025-09-10 02:27:26.182277", "step": 5496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:26.214107", "step": 5496, "epoch": 3 }, { "type": "loss", "content": 0.00011019222438335419, "timestamp": "2025-09-10 02:27:26.218755", "step": 5497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:26.256942", "step": 5497, "epoch": 3 }, { "type": "loss", "content": 0.0003960966714657843, "timestamp": "2025-09-10 02:27:26.272845", "step": 5498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:26.303273", "step": 5498, "epoch": 3 }, { "type": "loss", "content": 0.0002474244683980942, "timestamp": "2025-09-10 02:27:26.315523", "step": 5499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:26.352576", "step": 5499, "epoch": 3 }, { "type": "loss", "content": 0.017456304281949997, "timestamp": "2025-09-10 02:27:26.389120", "step": 5500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5500", "timestamp": "2025-09-10 02:27:31.536850", "step": 5500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:31.579077", "step": 5500, "epoch": 3 }, { "type": "loss", "content": 0.002106861909851432, "timestamp": "2025-09-10 02:27:31.582270", "step": 5501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:27:31.629193", "step": 5501, "epoch": 3 }, { "type": "loss", "content": 0.00021317604114301503, "timestamp": "2025-09-10 02:27:31.642886", "step": 5502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:31.674071", "step": 5502, "epoch": 3 }, { "type": "loss", "content": 0.00021606599329970777, "timestamp": "2025-09-10 02:27:31.680617", "step": 5503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:31.710421", "step": 5503, "epoch": 3 }, { "type": "loss", "content": 0.0007330483640544116, "timestamp": "2025-09-10 02:27:31.737988", "step": 5504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:27:31.781738", "step": 5504, "epoch": 3 }, { "type": "loss", "content": 0.000325193686876446, "timestamp": "2025-09-10 02:27:31.800720", "step": 5505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:31.833182", "step": 5505, "epoch": 3 }, { "type": "loss", "content": 0.0002977077674586326, "timestamp": "2025-09-10 02:27:31.840308", "step": 5506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:31.871361", "step": 5506, "epoch": 3 }, { "type": "loss", "content": 0.0009078510920517147, "timestamp": "2025-09-10 02:27:31.878302", "step": 5507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:31.909839", "step": 5507, "epoch": 3 }, { "type": "loss", "content": 0.005771205294877291, "timestamp": "2025-09-10 02:27:31.938113", "step": 5508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:31.968598", "step": 5508, "epoch": 3 }, { "type": "loss", "content": 5.6392182159470394e-05, "timestamp": "2025-09-10 02:27:31.972981", "step": 5509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:32.006458", "step": 5509, "epoch": 3 }, { "type": "loss", "content": 0.0001654118241276592, "timestamp": "2025-09-10 02:27:32.019865", "step": 5510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:32.059249", "step": 5510, "epoch": 3 }, { "type": "loss", "content": 0.0002515481901355088, "timestamp": "2025-09-10 02:27:32.072881", "step": 5511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:32.104293", "step": 5511, "epoch": 3 }, { "type": "loss", "content": 0.0001816750009311363, "timestamp": "2025-09-10 02:27:32.132676", "step": 5512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:32.162778", "step": 5512, "epoch": 3 }, { "type": "loss", "content": 0.00022820988669991493, "timestamp": "2025-09-10 02:27:32.164857", "step": 5513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:32.194948", "step": 5513, "epoch": 3 }, { "type": "loss", "content": 0.0005616779671981931, "timestamp": "2025-09-10 02:27:32.197489", "step": 5514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:32.227397", "step": 5514, "epoch": 3 }, { "type": "loss", "content": 7.715394895058125e-05, "timestamp": "2025-09-10 02:27:32.229522", "step": 5515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:32.259843", "step": 5515, "epoch": 3 }, { "type": "loss", "content": 0.003506176406517625, "timestamp": "2025-09-10 02:27:32.293269", "step": 5516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:32.323168", "step": 5516, "epoch": 3 }, { "type": "loss", "content": 0.0001584869751241058, "timestamp": "2025-09-10 02:27:32.325243", "step": 5517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:32.358806", "step": 5517, "epoch": 3 }, { "type": "loss", "content": 0.0006221079966053367, "timestamp": "2025-09-10 02:27:32.372521", "step": 5518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:32.403453", "step": 5518, "epoch": 3 }, { "type": "loss", "content": 4.248010372975841e-05, "timestamp": "2025-09-10 02:27:32.410382", "step": 5519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:32.440771", "step": 5519, "epoch": 3 }, { "type": "loss", "content": 0.0002929776383098215, "timestamp": "2025-09-10 02:27:32.472429", "step": 5520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:32.506997", "step": 5520, "epoch": 3 }, { "type": "loss", "content": 0.0004986113053746521, "timestamp": "2025-09-10 02:27:32.508976", "step": 5521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:32.539000", "step": 5521, "epoch": 3 }, { "type": "loss", "content": 0.00017840255168266594, "timestamp": "2025-09-10 02:27:32.545758", "step": 5522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:32.578145", "step": 5522, "epoch": 3 }, { "type": "loss", "content": 7.241130515467376e-05, "timestamp": "2025-09-10 02:27:32.584978", "step": 5523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:32.618132", "step": 5523, "epoch": 3 }, { "type": "loss", "content": 0.0008913822821341455, "timestamp": "2025-09-10 02:27:32.641737", "step": 5524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:32.672034", "step": 5524, "epoch": 3 }, { "type": "loss", "content": 0.00020360689086373895, "timestamp": "2025-09-10 02:27:32.677021", "step": 5525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:32.710737", "step": 5525, "epoch": 3 }, { "type": "loss", "content": 0.00012081407476216555, "timestamp": "2025-09-10 02:27:32.724427", "step": 5526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:32.754305", "step": 5526, "epoch": 3 }, { "type": "loss", "content": 0.0008983220905065536, "timestamp": "2025-09-10 02:27:32.761358", "step": 5527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:32.791412", "step": 5527, "epoch": 3 }, { "type": "loss", "content": 0.00018443951557856053, "timestamp": "2025-09-10 02:27:32.816320", "step": 5528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:32.847149", "step": 5528, "epoch": 3 }, { "type": "loss", "content": 0.00016453374701086432, "timestamp": "2025-09-10 02:27:32.855023", "step": 5529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:32.886789", "step": 5529, "epoch": 3 }, { "type": "loss", "content": 0.00020828154811169952, "timestamp": "2025-09-10 02:27:32.897005", "step": 5530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:32.932004", "step": 5530, "epoch": 3 }, { "type": "loss", "content": 0.0003416097315493971, "timestamp": "2025-09-10 02:27:32.945746", "step": 5531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:32.978891", "step": 5531, "epoch": 3 }, { "type": "loss", "content": 0.0005632195970974863, "timestamp": "2025-09-10 02:27:33.013047", "step": 5532, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:33.044070", "step": 5532, "epoch": 3 }, { "type": "loss", "content": 0.005745100323110819, "timestamp": "2025-09-10 02:27:33.051515", "step": 5533, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:33.084358", "step": 5533, "epoch": 3 }, { "type": "loss", "content": 0.000916880089789629, "timestamp": "2025-09-10 02:27:33.088706", "step": 5534, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:33.118543", "step": 5534, "epoch": 3 }, { "type": "loss", "content": 0.00013511795259546489, "timestamp": "2025-09-10 02:27:33.125244", "step": 5535, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:33.156478", "step": 5535, "epoch": 3 }, { "type": "loss", "content": 0.011048159562051296, "timestamp": "2025-09-10 02:27:33.184951", "step": 5536, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:33.214401", "step": 5536, "epoch": 3 }, { "type": "loss", "content": 0.0008397336350753903, "timestamp": "2025-09-10 02:27:33.216071", "step": 5537, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:33.245476", "step": 5537, "epoch": 3 }, { "type": "loss", "content": 0.0011787625262513757, "timestamp": "2025-09-10 02:27:33.249811", "step": 5538, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:33.281454", "step": 5538, "epoch": 3 }, { "type": "loss", "content": 0.0012026058975607157, "timestamp": "2025-09-10 02:27:33.291796", "step": 5539, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:33.321747", "step": 5539, "epoch": 3 }, { "type": "loss", "content": 0.03373594582080841, "timestamp": "2025-09-10 02:27:33.346776", "step": 5540, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:33.376340", "step": 5540, "epoch": 3 }, { "type": "loss", "content": 0.00018737561185844243, "timestamp": "2025-09-10 02:27:33.380012", "step": 5541, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:33.414380", "step": 5541, "epoch": 3 }, { "type": "loss", "content": 5.8746190916281193e-05, "timestamp": "2025-09-10 02:27:33.421294", "step": 5542, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:33.451445", "step": 5542, "epoch": 3 }, { "type": "loss", "content": 0.03155756741762161, "timestamp": "2025-09-10 02:27:33.463526", "step": 5543, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:33.494717", "step": 5543, "epoch": 3 }, { "type": "loss", "content": 0.0006670065922662616, "timestamp": "2025-09-10 02:27:33.523226", "step": 5544, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:33.554422", "step": 5544, "epoch": 3 }, { "type": "loss", "content": 0.04387173801660538, "timestamp": "2025-09-10 02:27:33.559135", "step": 5545, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:33.593139", "step": 5545, "epoch": 3 }, { "type": "loss", "content": 0.00019743894517887384, "timestamp": "2025-09-10 02:27:33.605635", "step": 5546, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:33.639149", "step": 5546, "epoch": 3 }, { "type": "loss", "content": 6.449820648413152e-05, "timestamp": "2025-09-10 02:27:33.648609", "step": 5547, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:33.680150", "step": 5547, "epoch": 3 }, { "type": "loss", "content": 0.011188429780304432, "timestamp": "2025-09-10 02:27:33.707910", "step": 5548, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:33.738430", "step": 5548, "epoch": 3 }, { "type": "loss", "content": 0.0005150972865521908, "timestamp": "2025-09-10 02:27:33.743746", "step": 5549, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:33.778426", "step": 5549, "epoch": 3 }, { "type": "loss", "content": 0.0003233766183257103, "timestamp": "2025-09-10 02:27:33.785737", "step": 5550, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:33.819092", "step": 5550, "epoch": 3 }, { "type": "loss", "content": 0.00020095381478313357, "timestamp": "2025-09-10 02:27:33.831112", "step": 5551, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:33.870353", "step": 5551, "epoch": 3 }, { "type": "loss", "content": 0.00017904266132973135, "timestamp": "2025-09-10 02:27:33.907089", "step": 5552, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:33.941168", "step": 5552, "epoch": 3 }, { "type": "loss", "content": 3.078414738411084e-05, "timestamp": "2025-09-10 02:27:33.943772", "step": 5553, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:33.976793", "step": 5553, "epoch": 3 }, { "type": "loss", "content": 0.0003823090228252113, "timestamp": "2025-09-10 02:27:33.983643", "step": 5554, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:27:34.025232", "step": 5554, "epoch": 3 }, { "type": "loss", "content": 0.0005709293182007968, "timestamp": "2025-09-10 02:27:34.042566", "step": 5555, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:34.074986", "step": 5555, "epoch": 3 }, { "type": "loss", "content": 0.00023254666302818805, "timestamp": "2025-09-10 02:27:34.102934", "step": 5556, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:34.135109", "step": 5556, "epoch": 3 }, { "type": "loss", "content": 0.00022391592210624367, "timestamp": "2025-09-10 02:27:34.137096", "step": 5557, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:34.170741", "step": 5557, "epoch": 3 }, { "type": "loss", "content": 0.015340792946517467, "timestamp": "2025-09-10 02:27:34.178292", "step": 5558, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:27:34.219562", "step": 5558, "epoch": 3 }, { "type": "loss", "content": 0.001337612047791481, "timestamp": "2025-09-10 02:27:34.235720", "step": 5559, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:34.272146", "step": 5559, "epoch": 3 }, { "type": "loss", "content": 0.0005941048148088157, "timestamp": "2025-09-10 02:27:34.306749", "step": 5560, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:27:34.340343", "step": 5560, "epoch": 3 }, { "type": "loss", "content": 0.00025993268354795873, "timestamp": "2025-09-10 02:27:34.353688", "step": 5561, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:34.385854", "step": 5561, "epoch": 3 }, { "type": "loss", "content": 0.000905154156498611, "timestamp": "2025-09-10 02:27:34.389899", "step": 5562, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:34.428220", "step": 5562, "epoch": 3 }, { "type": "loss", "content": 0.0006523271440528333, "timestamp": "2025-09-10 02:27:34.432679", "step": 5563, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:34.474553", "step": 5563, "epoch": 3 }, { "type": "loss", "content": 7.019279291853309e-05, "timestamp": "2025-09-10 02:27:34.506256", "step": 5564, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:34.536315", "step": 5564, "epoch": 3 }, { "type": "loss", "content": 5.2420513384277e-05, "timestamp": "2025-09-10 02:27:34.540790", "step": 5565, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:34.571997", "step": 5565, "epoch": 3 }, { "type": "loss", "content": 0.00020063482224941254, "timestamp": "2025-09-10 02:27:34.581502", "step": 5566, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:27:34.632019", "step": 5566, "epoch": 3 }, { "type": "loss", "content": 0.0006023825262673199, "timestamp": "2025-09-10 02:27:34.649384", "step": 5567, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:34.689284", "step": 5567, "epoch": 3 }, { "type": "loss", "content": 0.0001301374431932345, "timestamp": "2025-09-10 02:27:34.726061", "step": 5568, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:34.758512", "step": 5568, "epoch": 3 }, { "type": "loss", "content": 0.000419637217419222, "timestamp": "2025-09-10 02:27:34.762345", "step": 5569, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:34.793133", "step": 5569, "epoch": 3 }, { "type": "loss", "content": 0.00030135762062855065, "timestamp": "2025-09-10 02:27:34.802810", "step": 5570, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:34.835555", "step": 5570, "epoch": 3 }, { "type": "loss", "content": 9.938400035025552e-05, "timestamp": "2025-09-10 02:27:34.847463", "step": 5571, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:34.878126", "step": 5571, "epoch": 3 }, { "type": "loss", "content": 0.001446371665224433, "timestamp": "2025-09-10 02:27:34.905980", "step": 5572, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:34.937476", "step": 5572, "epoch": 3 }, { "type": "loss", "content": 0.0001281161530641839, "timestamp": "2025-09-10 02:27:34.942245", "step": 5573, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:27:34.976733", "step": 5573, "epoch": 3 }, { "type": "loss", "content": 0.00013364390179049224, "timestamp": "2025-09-10 02:27:34.990743", "step": 5574, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:35.024991", "step": 5574, "epoch": 3 }, { "type": "loss", "content": 0.0001239602715941146, "timestamp": "2025-09-10 02:27:35.038337", "step": 5575, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:35.069263", "step": 5575, "epoch": 3 }, { "type": "loss", "content": 0.00047443489893339574, "timestamp": "2025-09-10 02:27:35.097251", "step": 5576, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:35.127534", "step": 5576, "epoch": 3 }, { "type": "loss", "content": 0.00024012614449020475, "timestamp": "2025-09-10 02:27:35.130325", "step": 5577, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:35.160617", "step": 5577, "epoch": 3 }, { "type": "loss", "content": 0.0003875931433867663, "timestamp": "2025-09-10 02:27:35.167435", "step": 5578, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:35.197741", "step": 5578, "epoch": 3 }, { "type": "loss", "content": 0.0003180662461090833, "timestamp": "2025-09-10 02:27:35.208661", "step": 5579, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:35.239848", "step": 5579, "epoch": 3 }, { "type": "loss", "content": 0.00013746933836955577, "timestamp": "2025-09-10 02:27:35.267537", "step": 5580, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:35.298635", "step": 5580, "epoch": 3 }, { "type": "loss", "content": 0.005690049845725298, "timestamp": "2025-09-10 02:27:35.303999", "step": 5581, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:35.335409", "step": 5581, "epoch": 3 }, { "type": "loss", "content": 0.00014996285608503968, "timestamp": "2025-09-10 02:27:35.342161", "step": 5582, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:35.374179", "step": 5582, "epoch": 3 }, { "type": "loss", "content": 0.00021600407490041107, "timestamp": "2025-09-10 02:27:35.380733", "step": 5583, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:35.411851", "step": 5583, "epoch": 3 }, { "type": "loss", "content": 0.0007192405755631626, "timestamp": "2025-09-10 02:27:35.436830", "step": 5584, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:35.467895", "step": 5584, "epoch": 3 }, { "type": "loss", "content": 0.0003458319406490773, "timestamp": "2025-09-10 02:27:35.472981", "step": 5585, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:35.503975", "step": 5585, "epoch": 3 }, { "type": "loss", "content": 0.0002480298571754247, "timestamp": "2025-09-10 02:27:35.506323", "step": 5586, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:27:45.964600", "step": 5586, "epoch": 3 }, { "type": "pplx", "content": 21153895.963864572, "timestamp": "2025-09-10 02:27:45.968772", "step": 5586, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:45.999647", "step": 5586, "epoch": 3 }, { "type": "loss", "content": 0.0005043831770308316, "timestamp": "2025-09-10 02:27:46.005515", "step": 5587, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:46.044554", "step": 5587, "epoch": 3 }, { "type": "loss", "content": 0.00019294557569082826, "timestamp": "2025-09-10 02:27:46.078716", "step": 5588, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:46.110185", "step": 5588, "epoch": 3 }, { "type": "loss", "content": 0.0006769891479052603, "timestamp": "2025-09-10 02:27:46.114567", "step": 5589, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:46.145830", "step": 5589, "epoch": 3 }, { "type": "loss", "content": 0.00010750783985713497, "timestamp": "2025-09-10 02:27:46.153069", "step": 5590, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:46.184008", "step": 5590, "epoch": 3 }, { "type": "loss", "content": 0.00029583461582660675, "timestamp": "2025-09-10 02:27:46.188447", "step": 5591, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:46.218761", "step": 5591, "epoch": 3 }, { "type": "loss", "content": 0.0015220470959320664, "timestamp": "2025-09-10 02:27:46.244206", "step": 5592, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:46.275648", "step": 5592, "epoch": 3 }, { "type": "loss", "content": 0.00018189029651694, "timestamp": "2025-09-10 02:27:46.283544", "step": 5593, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:46.313685", "step": 5593, "epoch": 3 }, { "type": "loss", "content": 0.0017844110261648893, "timestamp": "2025-09-10 02:27:46.320745", "step": 5594, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:46.352390", "step": 5594, "epoch": 3 }, { "type": "loss", "content": 0.002676573349162936, "timestamp": "2025-09-10 02:27:46.364916", "step": 5595, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:46.396379", "step": 5595, "epoch": 3 }, { "type": "loss", "content": 0.0013415786670520902, "timestamp": "2025-09-10 02:27:46.428107", "step": 5596, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:46.459638", "step": 5596, "epoch": 3 }, { "type": "loss", "content": 0.0009746703435666859, "timestamp": "2025-09-10 02:27:46.467143", "step": 5597, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:46.498227", "step": 5597, "epoch": 3 }, { "type": "loss", "content": 0.0008030079188756645, "timestamp": "2025-09-10 02:27:46.505002", "step": 5598, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:46.536683", "step": 5598, "epoch": 3 }, { "type": "loss", "content": 0.012600510381162167, "timestamp": "2025-09-10 02:27:46.546818", "step": 5599, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:46.577027", "step": 5599, "epoch": 3 }, { "type": "loss", "content": 0.00023556490486953408, "timestamp": "2025-09-10 02:27:46.605544", "step": 5600, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 784 ], "flops": 23255845310656 }, "timestamp": "2025-09-10 02:27:46.668269", "step": 5600, "epoch": 3 }, { "type": "loss", "content": 0.002329291310161352, "timestamp": "2025-09-10 02:27:46.695192", "step": 5601, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:46.727012", "step": 5601, "epoch": 3 }, { "type": "loss", "content": 0.0002381420199526474, "timestamp": "2025-09-10 02:27:46.739574", "step": 5602, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:46.770291", "step": 5602, "epoch": 3 }, { "type": "loss", "content": 0.00032722530886530876, "timestamp": "2025-09-10 02:27:46.774173", "step": 5603, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:46.805995", "step": 5603, "epoch": 3 }, { "type": "loss", "content": 0.00039383722469210625, "timestamp": "2025-09-10 02:27:46.837715", "step": 5604, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:46.869559", "step": 5604, "epoch": 3 }, { "type": "loss", "content": 0.0016721284482628107, "timestamp": "2025-09-10 02:27:46.877324", "step": 5605, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:27:46.917602", "step": 5605, "epoch": 3 }, { "type": "loss", "content": 0.0009820311097428203, "timestamp": "2025-09-10 02:27:46.933729", "step": 5606, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:46.968616", "step": 5606, "epoch": 3 }, { "type": "loss", "content": 0.0013964021345600486, "timestamp": "2025-09-10 02:27:46.971092", "step": 5607, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:47.001901", "step": 5607, "epoch": 3 }, { "type": "loss", "content": 0.00012165692896815017, "timestamp": "2025-09-10 02:27:47.029468", "step": 5608, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:47.060590", "step": 5608, "epoch": 3 }, { "type": "loss", "content": 0.0014244894264265895, "timestamp": "2025-09-10 02:27:47.065971", "step": 5609, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:47.097599", "step": 5609, "epoch": 3 }, { "type": "loss", "content": 3.5444008972262964e-05, "timestamp": "2025-09-10 02:27:47.102060", "step": 5610, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.134406", "step": 5610, "epoch": 3 }, { "type": "loss", "content": 0.020029067993164062, "timestamp": "2025-09-10 02:27:47.141398", "step": 5611, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.173807", "step": 5611, "epoch": 3 }, { "type": "loss", "content": 0.00018498908320907503, "timestamp": "2025-09-10 02:27:47.198781", "step": 5612, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:47.230234", "step": 5612, "epoch": 3 }, { "type": "loss", "content": 0.00031777346157468855, "timestamp": "2025-09-10 02:27:47.235598", "step": 5613, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:47.270740", "step": 5613, "epoch": 3 }, { "type": "loss", "content": 0.00015662026999052614, "timestamp": "2025-09-10 02:27:47.278215", "step": 5614, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.309072", "step": 5614, "epoch": 3 }, { "type": "loss", "content": 0.0004629619943443686, "timestamp": "2025-09-10 02:27:47.312933", "step": 5615, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:47.343858", "step": 5615, "epoch": 3 }, { "type": "loss", "content": 9.026808402268216e-05, "timestamp": "2025-09-10 02:27:47.372453", "step": 5616, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:47.412614", "step": 5616, "epoch": 3 }, { "type": "loss", "content": 0.00044115257333032787, "timestamp": "2025-09-10 02:27:47.417106", "step": 5617, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:27:47.459530", "step": 5617, "epoch": 3 }, { "type": "loss", "content": 0.00015083990001585335, "timestamp": "2025-09-10 02:27:47.475170", "step": 5618, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:47.515880", "step": 5618, "epoch": 3 }, { "type": "loss", "content": 0.00014641489542555064, "timestamp": "2025-09-10 02:27:47.525490", "step": 5619, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.558296", "step": 5619, "epoch": 3 }, { "type": "loss", "content": 0.002689136890694499, "timestamp": "2025-09-10 02:27:47.583300", "step": 5620, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:27:47.632703", "step": 5620, "epoch": 3 }, { "type": "loss", "content": 0.00016491406131535769, "timestamp": "2025-09-10 02:27:47.649985", "step": 5621, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:47.686849", "step": 5621, "epoch": 3 }, { "type": "loss", "content": 0.00010262165596941486, "timestamp": "2025-09-10 02:27:47.700613", "step": 5622, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.730764", "step": 5622, "epoch": 3 }, { "type": "loss", "content": 0.0035222251899540424, "timestamp": "2025-09-10 02:27:47.734893", "step": 5623, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:47.768301", "step": 5623, "epoch": 3 }, { "type": "loss", "content": 0.00014401556109078228, "timestamp": "2025-09-10 02:27:47.796183", "step": 5624, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:47.829146", "step": 5624, "epoch": 3 }, { "type": "loss", "content": 9.734489867696539e-05, "timestamp": "2025-09-10 02:27:47.834206", "step": 5625, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:47.868850", "step": 5625, "epoch": 3 }, { "type": "loss", "content": 0.0001005322701530531, "timestamp": "2025-09-10 02:27:47.875667", "step": 5626, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:47.908700", "step": 5626, "epoch": 3 }, { "type": "loss", "content": 0.00047445675591006875, "timestamp": "2025-09-10 02:27:47.912771", "step": 5627, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:47.946188", "step": 5627, "epoch": 3 }, { "type": "loss", "content": 0.01834898255765438, "timestamp": "2025-09-10 02:27:47.974549", "step": 5628, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:48.006496", "step": 5628, "epoch": 3 }, { "type": "loss", "content": 0.011012358590960503, "timestamp": "2025-09-10 02:27:48.011573", "step": 5629, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:48.045571", "step": 5629, "epoch": 3 }, { "type": "loss", "content": 5.657299334416166e-05, "timestamp": "2025-09-10 02:27:48.049587", "step": 5630, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:48.081291", "step": 5630, "epoch": 3 }, { "type": "loss", "content": 7.444791117450222e-05, "timestamp": "2025-09-10 02:27:48.085451", "step": 5631, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:48.116422", "step": 5631, "epoch": 3 }, { "type": "loss", "content": 0.0001294314133701846, "timestamp": "2025-09-10 02:27:48.144885", "step": 5632, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:48.175511", "step": 5632, "epoch": 3 }, { "type": "loss", "content": 0.0005845736595802009, "timestamp": "2025-09-10 02:27:48.181018", "step": 5633, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:48.212713", "step": 5633, "epoch": 3 }, { "type": "loss", "content": 0.0001259546697838232, "timestamp": "2025-09-10 02:27:48.219610", "step": 5634, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:48.250841", "step": 5634, "epoch": 3 }, { "type": "loss", "content": 8.173291280400008e-05, "timestamp": "2025-09-10 02:27:48.257649", "step": 5635, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:48.289711", "step": 5635, "epoch": 3 }, { "type": "loss", "content": 9.338084782939404e-05, "timestamp": "2025-09-10 02:27:48.323187", "step": 5636, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:48.354729", "step": 5636, "epoch": 3 }, { "type": "loss", "content": 0.014086895622313023, "timestamp": "2025-09-10 02:27:48.363501", "step": 5637, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:48.394692", "step": 5637, "epoch": 3 }, { "type": "loss", "content": 0.0006178818293847144, "timestamp": "2025-09-10 02:27:48.399112", "step": 5638, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:48.430466", "step": 5638, "epoch": 3 }, { "type": "loss", "content": 0.003365145530551672, "timestamp": "2025-09-10 02:27:48.440750", "step": 5639, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:27:48.475898", "step": 5639, "epoch": 3 }, { "type": "loss", "content": 0.00015990216343197972, "timestamp": "2025-09-10 02:27:48.510610", "step": 5640, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:48.541734", "step": 5640, "epoch": 3 }, { "type": "loss", "content": 0.0005560553981922567, "timestamp": "2025-09-10 02:27:48.546755", "step": 5641, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:48.578165", "step": 5641, "epoch": 3 }, { "type": "loss", "content": 0.0001505583932157606, "timestamp": "2025-09-10 02:27:48.585776", "step": 5642, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:48.616419", "step": 5642, "epoch": 3 }, { "type": "loss", "content": 0.00036215136060491204, "timestamp": "2025-09-10 02:27:48.626544", "step": 5643, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:48.658433", "step": 5643, "epoch": 3 }, { "type": "loss", "content": 0.00019817725114990026, "timestamp": "2025-09-10 02:27:48.683629", "step": 5644, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:48.714705", "step": 5644, "epoch": 3 }, { "type": "loss", "content": 0.0002911986375693232, "timestamp": "2025-09-10 02:27:48.720004", "step": 5645, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:48.751853", "step": 5645, "epoch": 3 }, { "type": "loss", "content": 0.0006733777699992061, "timestamp": "2025-09-10 02:27:48.762610", "step": 5646, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:48.793476", "step": 5646, "epoch": 3 }, { "type": "loss", "content": 0.00010006874072132632, "timestamp": "2025-09-10 02:27:48.797969", "step": 5647, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:48.829166", "step": 5647, "epoch": 3 }, { "type": "loss", "content": 0.00048814056208357215, "timestamp": "2025-09-10 02:27:48.862325", "step": 5648, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:48.894618", "step": 5648, "epoch": 3 }, { "type": "loss", "content": 0.001697771018370986, "timestamp": "2025-09-10 02:27:48.899692", "step": 5649, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:48.930035", "step": 5649, "epoch": 3 }, { "type": "loss", "content": 0.0001428636023774743, "timestamp": "2025-09-10 02:27:48.934199", "step": 5650, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:48.965831", "step": 5650, "epoch": 3 }, { "type": "loss", "content": 0.000209279969567433, "timestamp": "2025-09-10 02:27:48.976845", "step": 5651, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:49.007514", "step": 5651, "epoch": 3 }, { "type": "loss", "content": 0.00015839662228245288, "timestamp": "2025-09-10 02:27:49.032626", "step": 5652, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:49.063881", "step": 5652, "epoch": 3 }, { "type": "loss", "content": 9.35560601647012e-05, "timestamp": "2025-09-10 02:27:49.068742", "step": 5653, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:49.099818", "step": 5653, "epoch": 3 }, { "type": "loss", "content": 0.0006064533954486251, "timestamp": "2025-09-10 02:27:49.107264", "step": 5654, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:49.145723", "step": 5654, "epoch": 3 }, { "type": "loss", "content": 0.0011858725920319557, "timestamp": "2025-09-10 02:27:49.161628", "step": 5655, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:49.193563", "step": 5655, "epoch": 3 }, { "type": "loss", "content": 0.00020078910165466368, "timestamp": "2025-09-10 02:27:49.222016", "step": 5656, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:49.258814", "step": 5656, "epoch": 3 }, { "type": "loss", "content": 0.0003024769830517471, "timestamp": "2025-09-10 02:27:49.264152", "step": 5657, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:49.295019", "step": 5657, "epoch": 3 }, { "type": "loss", "content": 0.00010363813635194674, "timestamp": "2025-09-10 02:27:49.299630", "step": 5658, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:49.330561", "step": 5658, "epoch": 3 }, { "type": "loss", "content": 0.000979832955636084, "timestamp": "2025-09-10 02:27:49.337473", "step": 5659, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:49.368731", "step": 5659, "epoch": 3 }, { "type": "loss", "content": 0.00316768535412848, "timestamp": "2025-09-10 02:27:49.397153", "step": 5660, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:49.428455", "step": 5660, "epoch": 3 }, { "type": "loss", "content": 0.000212435275898315, "timestamp": "2025-09-10 02:27:49.433700", "step": 5661, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:49.464662", "step": 5661, "epoch": 3 }, { "type": "loss", "content": 0.0002012075565289706, "timestamp": "2025-09-10 02:27:49.471690", "step": 5662, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:49.503263", "step": 5662, "epoch": 3 }, { "type": "loss", "content": 0.00011027476284652948, "timestamp": "2025-09-10 02:27:49.510978", "step": 5663, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:49.542521", "step": 5663, "epoch": 3 }, { "type": "loss", "content": 0.0003467754868324846, "timestamp": "2025-09-10 02:27:49.570507", "step": 5664, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:49.603015", "step": 5664, "epoch": 3 }, { "type": "loss", "content": 0.0003236646589357406, "timestamp": "2025-09-10 02:27:49.608415", "step": 5665, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:49.640871", "step": 5665, "epoch": 3 }, { "type": "loss", "content": 0.00015571604308206588, "timestamp": "2025-09-10 02:27:49.651335", "step": 5666, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:49.683553", "step": 5666, "epoch": 3 }, { "type": "loss", "content": 8.091597555903718e-05, "timestamp": "2025-09-10 02:27:49.691003", "step": 5667, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:49.723344", "step": 5667, "epoch": 3 }, { "type": "loss", "content": 0.007872511632740498, "timestamp": "2025-09-10 02:27:49.754583", "step": 5668, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:49.786086", "step": 5668, "epoch": 3 }, { "type": "loss", "content": 0.0007030696724541485, "timestamp": "2025-09-10 02:27:49.798738", "step": 5669, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:49.831188", "step": 5669, "epoch": 3 }, { "type": "loss", "content": 0.0006685466505587101, "timestamp": "2025-09-10 02:27:49.841817", "step": 5670, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:49.874810", "step": 5670, "epoch": 3 }, { "type": "loss", "content": 0.04719764366745949, "timestamp": "2025-09-10 02:27:49.879227", "step": 5671, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:49.909984", "step": 5671, "epoch": 3 }, { "type": "loss", "content": 0.0005829405854456127, "timestamp": "2025-09-10 02:27:49.935147", "step": 5672, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:49.966937", "step": 5672, "epoch": 3 }, { "type": "loss", "content": 0.00043701488175429404, "timestamp": "2025-09-10 02:27:49.969613", "step": 5673, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:50.000908", "step": 5673, "epoch": 3 }, { "type": "loss", "content": 0.00024339115770999342, "timestamp": "2025-09-10 02:27:50.005518", "step": 5674, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:50.036102", "step": 5674, "epoch": 3 }, { "type": "loss", "content": 0.00014959640975575894, "timestamp": "2025-09-10 02:27:50.043834", "step": 5675, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:27:50.080322", "step": 5675, "epoch": 3 }, { "type": "loss", "content": 6.585565279237926e-05, "timestamp": "2025-09-10 02:27:50.115216", "step": 5676, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:50.146329", "step": 5676, "epoch": 3 }, { "type": "loss", "content": 0.05050432309508324, "timestamp": "2025-09-10 02:27:50.156092", "step": 5677, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:27:50.190885", "step": 5677, "epoch": 3 }, { "type": "loss", "content": 0.00037958953180350363, "timestamp": "2025-09-10 02:27:50.204725", "step": 5678, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:50.235976", "step": 5678, "epoch": 3 }, { "type": "loss", "content": 0.00040227436693385243, "timestamp": "2025-09-10 02:27:50.242748", "step": 5679, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:50.273801", "step": 5679, "epoch": 3 }, { "type": "loss", "content": 0.00012008142948616296, "timestamp": "2025-09-10 02:27:50.298810", "step": 5680, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:50.329195", "step": 5680, "epoch": 3 }, { "type": "loss", "content": 9.119750757236034e-05, "timestamp": "2025-09-10 02:27:50.333798", "step": 5681, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:50.365461", "step": 5681, "epoch": 3 }, { "type": "loss", "content": 0.0011381086660549045, "timestamp": "2025-09-10 02:27:50.378048", "step": 5682, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 576 ], "flops": 17085996872448 }, "timestamp": "2025-09-10 02:27:50.427147", "step": 5682, "epoch": 3 }, { "type": "loss", "content": 0.0008125408785417676, "timestamp": "2025-09-10 02:27:50.446582", "step": 5683, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:50.478234", "step": 5683, "epoch": 3 }, { "type": "loss", "content": 0.00010114780889125541, "timestamp": "2025-09-10 02:27:50.509371", "step": 5684, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:50.540848", "step": 5684, "epoch": 3 }, { "type": "loss", "content": 0.0005297398311085999, "timestamp": "2025-09-10 02:27:50.543033", "step": 5685, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:50.574441", "step": 5685, "epoch": 3 }, { "type": "loss", "content": 0.0005330296116881073, "timestamp": "2025-09-10 02:27:50.582221", "step": 5686, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:27:50.616873", "step": 5686, "epoch": 3 }, { "type": "loss", "content": 0.0010487495455890894, "timestamp": "2025-09-10 02:27:50.630586", "step": 5687, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:50.661249", "step": 5687, "epoch": 3 }, { "type": "loss", "content": 0.00015214362065307796, "timestamp": "2025-09-10 02:27:50.685189", "step": 5688, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:50.717537", "step": 5688, "epoch": 3 }, { "type": "loss", "content": 0.010265544056892395, "timestamp": "2025-09-10 02:27:50.719956", "step": 5689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:50.750598", "step": 5689, "epoch": 3 }, { "type": "loss", "content": 0.0020830826833844185, "timestamp": "2025-09-10 02:27:50.757664", "step": 5690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:50.789024", "step": 5690, "epoch": 3 }, { "type": "loss", "content": 0.0004475609748624265, "timestamp": "2025-09-10 02:27:50.793326", "step": 5691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:50.824013", "step": 5691, "epoch": 3 }, { "type": "loss", "content": 0.0002475904766470194, "timestamp": "2025-09-10 02:27:50.847535", "step": 5692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:50.879538", "step": 5692, "epoch": 3 }, { "type": "loss", "content": 0.0027441454585641623, "timestamp": "2025-09-10 02:27:50.881916", "step": 5693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:50.916408", "step": 5693, "epoch": 3 }, { "type": "loss", "content": 0.0001911252038553357, "timestamp": "2025-09-10 02:27:50.926786", "step": 5694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:50.958900", "step": 5694, "epoch": 3 }, { "type": "loss", "content": 5.5323111155303195e-05, "timestamp": "2025-09-10 02:27:50.966470", "step": 5695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:50.999357", "step": 5695, "epoch": 3 }, { "type": "loss", "content": 0.0004969104775227606, "timestamp": "2025-09-10 02:27:51.031237", "step": 5696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:51.063553", "step": 5696, "epoch": 3 }, { "type": "loss", "content": 0.03493015095591545, "timestamp": "2025-09-10 02:27:51.065993", "step": 5697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:51.097386", "step": 5697, "epoch": 3 }, { "type": "loss", "content": 8.232889376813546e-05, "timestamp": "2025-09-10 02:27:51.104580", "step": 5698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:51.135783", "step": 5698, "epoch": 3 }, { "type": "loss", "content": 0.0008442237740382552, "timestamp": "2025-09-10 02:27:51.139307", "step": 5699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:51.170109", "step": 5699, "epoch": 3 }, { "type": "loss", "content": 0.006859573069959879, "timestamp": "2025-09-10 02:27:51.194266", "step": 5700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:51.227350", "step": 5700, "epoch": 3 }, { "type": "loss", "content": 0.0002599300933070481, "timestamp": "2025-09-10 02:27:51.232459", "step": 5701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:27:51.266118", "step": 5701, "epoch": 3 }, { "type": "loss", "content": 0.004736356902867556, "timestamp": "2025-09-10 02:27:51.279485", "step": 5702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:51.311764", "step": 5702, "epoch": 3 }, { "type": "loss", "content": 0.0007690637721680105, "timestamp": "2025-09-10 02:27:51.316233", "step": 5703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:51.347562", "step": 5703, "epoch": 3 }, { "type": "loss", "content": 0.0204758383333683, "timestamp": "2025-09-10 02:27:51.380594", "step": 5704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:51.412550", "step": 5704, "epoch": 3 }, { "type": "loss", "content": 0.0011761164059862494, "timestamp": "2025-09-10 02:27:51.416928", "step": 5705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:51.448539", "step": 5705, "epoch": 3 }, { "type": "loss", "content": 5.793437594547868e-05, "timestamp": "2025-09-10 02:27:51.455944", "step": 5706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:51.487078", "step": 5706, "epoch": 3 }, { "type": "loss", "content": 8.091299969237298e-05, "timestamp": "2025-09-10 02:27:51.493963", "step": 5707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:51.526583", "step": 5707, "epoch": 3 }, { "type": "loss", "content": 0.03535769507288933, "timestamp": "2025-09-10 02:27:51.551702", "step": 5708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:51.583111", "step": 5708, "epoch": 3 }, { "type": "loss", "content": 0.0004072172741871327, "timestamp": "2025-09-10 02:27:51.588498", "step": 5709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:51.619214", "step": 5709, "epoch": 3 }, { "type": "loss", "content": 0.0003087377699557692, "timestamp": "2025-09-10 02:27:51.627130", "step": 5710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:51.658723", "step": 5710, "epoch": 3 }, { "type": "loss", "content": 0.006235843989998102, "timestamp": "2025-09-10 02:27:51.670864", "step": 5711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:51.702195", "step": 5711, "epoch": 3 }, { "type": "loss", "content": 7.547135464847088e-05, "timestamp": "2025-09-10 02:27:51.730599", "step": 5712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:51.762336", "step": 5712, "epoch": 3 }, { "type": "loss", "content": 0.0005502524436451495, "timestamp": "2025-09-10 02:27:51.767362", "step": 5713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:27:51.798998", "step": 5713, "epoch": 3 }, { "type": "loss", "content": 0.0001679424021858722, "timestamp": "2025-09-10 02:27:51.811368", "step": 5714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:27:51.842756", "step": 5714, "epoch": 3 }, { "type": "loss", "content": 0.0006052498356439173, "timestamp": "2025-09-10 02:27:51.845260", "step": 5715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:27:51.877035", "step": 5715, "epoch": 3 }, { "type": "loss", "content": 0.0018796491203829646, "timestamp": "2025-09-10 02:27:51.910146", "step": 5716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:51.940836", "step": 5716, "epoch": 3 }, { "type": "loss", "content": 0.015208103694021702, "timestamp": "2025-09-10 02:27:51.943246", "step": 5717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:51.974973", "step": 5717, "epoch": 3 }, { "type": "loss", "content": 0.0038770330138504505, "timestamp": "2025-09-10 02:27:51.985251", "step": 5718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:52.017433", "step": 5718, "epoch": 3 }, { "type": "loss", "content": 0.0001229040208272636, "timestamp": "2025-09-10 02:27:52.027604", "step": 5719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:52.058545", "step": 5719, "epoch": 3 }, { "type": "loss", "content": 0.0011433316394686699, "timestamp": "2025-09-10 02:27:52.086539", "step": 5720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:52.117962", "step": 5720, "epoch": 3 }, { "type": "loss", "content": 0.0003462762397248298, "timestamp": "2025-09-10 02:27:52.120244", "step": 5721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:52.152796", "step": 5721, "epoch": 3 }, { "type": "loss", "content": 0.000564678106456995, "timestamp": "2025-09-10 02:27:52.159875", "step": 5722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:27:52.192123", "step": 5722, "epoch": 3 }, { "type": "loss", "content": 0.0010591919999569654, "timestamp": "2025-09-10 02:27:52.199658", "step": 5723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:27:52.230817", "step": 5723, "epoch": 3 }, { "type": "loss", "content": 0.01734619028866291, "timestamp": "2025-09-10 02:27:52.258566", "step": 5724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:27:52.289646", "step": 5724, "epoch": 3 }, { "type": "loss", "content": 0.0001629464386496693, "timestamp": "2025-09-10 02:27:52.297564", "step": 5725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:27:52.330081", "step": 5725, "epoch": 3 }, { "type": "loss", "content": 3.870922591886483e-05, "timestamp": "2025-09-10 02:27:52.333926", "step": 5726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:52.365369", "step": 5726, "epoch": 3 }, { "type": "loss", "content": 0.018190907314419746, "timestamp": "2025-09-10 02:27:52.369832", "step": 5727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:27:52.400840", "step": 5727, "epoch": 3 }, { "type": "loss", "content": 2.9860497306799516e-05, "timestamp": "2025-09-10 02:27:52.426196", "step": 5728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:52.456878", "step": 5728, "epoch": 3 }, { "type": "loss", "content": 0.0004968225257471204, "timestamp": "2025-09-10 02:27:52.465537", "step": 5729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:27:52.496269", "step": 5729, "epoch": 3 }, { "type": "loss", "content": 0.01434040255844593, "timestamp": "2025-09-10 02:27:52.504074", "step": 5730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:27:52.537346", "step": 5730, "epoch": 3 }, { "type": "loss", "content": 0.005067458841949701, "timestamp": "2025-09-10 02:27:52.544273", "step": 5731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:27:52.576225", "step": 5731, "epoch": 3 }, { "type": "loss", "content": 9.439605491934344e-05, "timestamp": "2025-09-10 02:27:52.608122", "step": 5732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:27:52.645892", "step": 5732, "epoch": 3 }, { "type": "loss", "content": 0.003644505748525262, "timestamp": "2025-09-10 02:27:52.661313", "step": 5733, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:28:02.807246", "step": 5733, "epoch": 3 }, { "type": "pplx", "content": 22101991.669623252, "timestamp": "2025-09-10 02:28:02.810174", "step": 5733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:02.840101", "step": 5733, "epoch": 3 }, { "type": "loss", "content": 0.033737100660800934, "timestamp": "2025-09-10 02:28:02.842373", "step": 5734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:28:02.876069", "step": 5734, "epoch": 3 }, { "type": "loss", "content": 0.00036468225880526006, "timestamp": "2025-09-10 02:28:02.878737", "step": 5735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:02.910075", "step": 5735, "epoch": 3 }, { "type": "loss", "content": 0.009591284207999706, "timestamp": "2025-09-10 02:28:02.937888", "step": 5736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:02.969710", "step": 5736, "epoch": 3 }, { "type": "loss", "content": 0.0036816957872360945, "timestamp": "2025-09-10 02:28:02.980055", "step": 5737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:28:03.016287", "step": 5737, "epoch": 3 }, { "type": "loss", "content": 0.0011977337999269366, "timestamp": "2025-09-10 02:28:03.030182", "step": 5738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:03.069816", "step": 5738, "epoch": 3 }, { "type": "loss", "content": 0.0003067262005060911, "timestamp": "2025-09-10 02:28:03.085708", "step": 5739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:03.121082", "step": 5739, "epoch": 3 }, { "type": "loss", "content": 0.026271553710103035, "timestamp": "2025-09-10 02:28:03.155396", "step": 5740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:03.189088", "step": 5740, "epoch": 3 }, { "type": "loss", "content": 0.0015502488240599632, "timestamp": "2025-09-10 02:28:03.198824", "step": 5741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:28:03.235146", "step": 5741, "epoch": 3 }, { "type": "loss", "content": 0.00022286844614427537, "timestamp": "2025-09-10 02:28:03.249150", "step": 5742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:03.283744", "step": 5742, "epoch": 3 }, { "type": "loss", "content": 0.0005459203966893256, "timestamp": "2025-09-10 02:28:03.294604", "step": 5743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:03.329955", "step": 5743, "epoch": 3 }, { "type": "loss", "content": 0.0006617820472456515, "timestamp": "2025-09-10 02:28:03.360954", "step": 5744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:03.393855", "step": 5744, "epoch": 3 }, { "type": "loss", "content": 0.00014259156887419522, "timestamp": "2025-09-10 02:28:03.396424", "step": 5745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:03.434320", "step": 5745, "epoch": 3 }, { "type": "loss", "content": 0.004075972363352776, "timestamp": "2025-09-10 02:28:03.437045", "step": 5746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:03.469204", "step": 5746, "epoch": 3 }, { "type": "loss", "content": 0.0010417302837595344, "timestamp": "2025-09-10 02:28:03.476114", "step": 5747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:03.507167", "step": 5747, "epoch": 3 }, { "type": "loss", "content": 0.0031860729213804007, "timestamp": "2025-09-10 02:28:03.531694", "step": 5748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:03.562989", "step": 5748, "epoch": 3 }, { "type": "loss", "content": 6.74440452712588e-05, "timestamp": "2025-09-10 02:28:03.568360", "step": 5749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:03.599347", "step": 5749, "epoch": 3 }, { "type": "loss", "content": 0.0004242011927999556, "timestamp": "2025-09-10 02:28:03.606428", "step": 5750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:03.638907", "step": 5750, "epoch": 3 }, { "type": "loss", "content": 0.002124165650457144, "timestamp": "2025-09-10 02:28:03.651497", "step": 5751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:03.685467", "step": 5751, "epoch": 3 }, { "type": "loss", "content": 0.0002149190113414079, "timestamp": "2025-09-10 02:28:03.710926", "step": 5752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:03.743262", "step": 5752, "epoch": 3 }, { "type": "loss", "content": 0.003125338116660714, "timestamp": "2025-09-10 02:28:03.745774", "step": 5753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:03.780095", "step": 5753, "epoch": 3 }, { "type": "loss", "content": 0.010561124421656132, "timestamp": "2025-09-10 02:28:03.791977", "step": 5754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:28:03.832603", "step": 5754, "epoch": 3 }, { "type": "loss", "content": 0.0001346966892015189, "timestamp": "2025-09-10 02:28:03.846651", "step": 5755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:03.878073", "step": 5755, "epoch": 3 }, { "type": "loss", "content": 0.0008288529934361577, "timestamp": "2025-09-10 02:28:03.909071", "step": 5756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:03.939746", "step": 5756, "epoch": 3 }, { "type": "loss", "content": 0.022131487727165222, "timestamp": "2025-09-10 02:28:03.944860", "step": 5757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:03.977124", "step": 5757, "epoch": 3 }, { "type": "loss", "content": 0.00015002823784016073, "timestamp": "2025-09-10 02:28:03.984507", "step": 5758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:04.020408", "step": 5758, "epoch": 3 }, { "type": "loss", "content": 0.0003043616015929729, "timestamp": "2025-09-10 02:28:04.032525", "step": 5759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:28:04.072446", "step": 5759, "epoch": 3 }, { "type": "loss", "content": 5.715518636861816e-05, "timestamp": "2025-09-10 02:28:04.109495", "step": 5760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:04.141618", "step": 5760, "epoch": 3 }, { "type": "loss", "content": 0.00012244329263921827, "timestamp": "2025-09-10 02:28:04.146827", "step": 5761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:04.178768", "step": 5761, "epoch": 3 }, { "type": "loss", "content": 0.004466865211725235, "timestamp": "2025-09-10 02:28:04.185715", "step": 5762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:04.217142", "step": 5762, "epoch": 3 }, { "type": "loss", "content": 0.0006833565421402454, "timestamp": "2025-09-10 02:28:04.229486", "step": 5763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:04.260629", "step": 5763, "epoch": 3 }, { "type": "loss", "content": 0.0013124813558533788, "timestamp": "2025-09-10 02:28:04.289011", "step": 5764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:04.321369", "step": 5764, "epoch": 3 }, { "type": "loss", "content": 0.0013321618316695094, "timestamp": "2025-09-10 02:28:04.328935", "step": 5765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:04.363050", "step": 5765, "epoch": 3 }, { "type": "loss", "content": 8.689150854479522e-05, "timestamp": "2025-09-10 02:28:04.376445", "step": 5766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:04.408901", "step": 5766, "epoch": 3 }, { "type": "loss", "content": 0.0008448630687780678, "timestamp": "2025-09-10 02:28:04.412863", "step": 5767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:04.447011", "step": 5767, "epoch": 3 }, { "type": "loss", "content": 7.289824134204537e-05, "timestamp": "2025-09-10 02:28:04.481249", "step": 5768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:04.513451", "step": 5768, "epoch": 3 }, { "type": "loss", "content": 0.0007902790675871074, "timestamp": "2025-09-10 02:28:04.521242", "step": 5769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:04.552772", "step": 5769, "epoch": 3 }, { "type": "loss", "content": 0.018035726621747017, "timestamp": "2025-09-10 02:28:04.560464", "step": 5770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:04.594612", "step": 5770, "epoch": 3 }, { "type": "loss", "content": 0.0003475056146271527, "timestamp": "2025-09-10 02:28:04.602399", "step": 5771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:04.634735", "step": 5771, "epoch": 3 }, { "type": "loss", "content": 0.001296757603995502, "timestamp": "2025-09-10 02:28:04.663414", "step": 5772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:04.695104", "step": 5772, "epoch": 3 }, { "type": "loss", "content": 0.0012029794743284583, "timestamp": "2025-09-10 02:28:04.700693", "step": 5773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:04.731906", "step": 5773, "epoch": 3 }, { "type": "loss", "content": 0.002924522617831826, "timestamp": "2025-09-10 02:28:04.739804", "step": 5774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:04.771603", "step": 5774, "epoch": 3 }, { "type": "loss", "content": 0.00025918486062437296, "timestamp": "2025-09-10 02:28:04.776217", "step": 5775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:28:04.811165", "step": 5775, "epoch": 3 }, { "type": "loss", "content": 0.00013173665502108634, "timestamp": "2025-09-10 02:28:04.845733", "step": 5776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:04.877086", "step": 5776, "epoch": 3 }, { "type": "loss", "content": 0.01582178846001625, "timestamp": "2025-09-10 02:28:04.884742", "step": 5777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:28:04.927710", "step": 5777, "epoch": 3 }, { "type": "loss", "content": 0.0009278925135731697, "timestamp": "2025-09-10 02:28:04.945062", "step": 5778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:04.977713", "step": 5778, "epoch": 3 }, { "type": "loss", "content": 0.0010470326524227858, "timestamp": "2025-09-10 02:28:04.984773", "step": 5779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.016401", "step": 5779, "epoch": 3 }, { "type": "loss", "content": 0.0019801973830908537, "timestamp": "2025-09-10 02:28:05.043954", "step": 5780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.074622", "step": 5780, "epoch": 3 }, { "type": "loss", "content": 0.0018586774822324514, "timestamp": "2025-09-10 02:28:05.079203", "step": 5781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:05.109637", "step": 5781, "epoch": 3 }, { "type": "loss", "content": 0.00046877076965756714, "timestamp": "2025-09-10 02:28:05.119815", "step": 5782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:05.150944", "step": 5782, "epoch": 3 }, { "type": "loss", "content": 0.00047852486022748053, "timestamp": "2025-09-10 02:28:05.158370", "step": 5783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:05.190177", "step": 5783, "epoch": 3 }, { "type": "loss", "content": 0.00019953006994910538, "timestamp": "2025-09-10 02:28:05.218090", "step": 5784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:05.248152", "step": 5784, "epoch": 3 }, { "type": "loss", "content": 0.028778070583939552, "timestamp": "2025-09-10 02:28:05.256231", "step": 5785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:05.287974", "step": 5785, "epoch": 3 }, { "type": "loss", "content": 0.00035870965803042054, "timestamp": "2025-09-10 02:28:05.295459", "step": 5786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:05.327021", "step": 5786, "epoch": 3 }, { "type": "loss", "content": 0.00494037102907896, "timestamp": "2025-09-10 02:28:05.334576", "step": 5787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.366972", "step": 5787, "epoch": 3 }, { "type": "loss", "content": 0.0017796893371269107, "timestamp": "2025-09-10 02:28:05.394417", "step": 5788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.425926", "step": 5788, "epoch": 3 }, { "type": "loss", "content": 0.003134679514914751, "timestamp": "2025-09-10 02:28:05.430593", "step": 5789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:05.461991", "step": 5789, "epoch": 3 }, { "type": "loss", "content": 0.0004141936369705945, "timestamp": "2025-09-10 02:28:05.466566", "step": 5790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:05.498350", "step": 5790, "epoch": 3 }, { "type": "loss", "content": 0.000323984568240121, "timestamp": "2025-09-10 02:28:05.501044", "step": 5791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:05.532998", "step": 5791, "epoch": 3 }, { "type": "loss", "content": 0.0023834407329559326, "timestamp": "2025-09-10 02:28:05.560963", "step": 5792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:05.607438", "step": 5792, "epoch": 3 }, { "type": "loss", "content": 0.00019796183914877474, "timestamp": "2025-09-10 02:28:05.612967", "step": 5793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:05.643451", "step": 5793, "epoch": 3 }, { "type": "loss", "content": 0.002645928878337145, "timestamp": "2025-09-10 02:28:05.651231", "step": 5794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:05.681908", "step": 5794, "epoch": 3 }, { "type": "loss", "content": 0.0006678312201984227, "timestamp": "2025-09-10 02:28:05.688973", "step": 5795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:05.732261", "step": 5795, "epoch": 3 }, { "type": "loss", "content": 0.0002754285524133593, "timestamp": "2025-09-10 02:28:05.766953", "step": 5796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:05.798145", "step": 5796, "epoch": 3 }, { "type": "loss", "content": 0.0007207631133496761, "timestamp": "2025-09-10 02:28:05.803576", "step": 5797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.835472", "step": 5797, "epoch": 3 }, { "type": "loss", "content": 0.0006030822987668216, "timestamp": "2025-09-10 02:28:05.842377", "step": 5798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:05.873976", "step": 5798, "epoch": 3 }, { "type": "loss", "content": 0.00043905325583182275, "timestamp": "2025-09-10 02:28:05.884375", "step": 5799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:05.919505", "step": 5799, "epoch": 3 }, { "type": "loss", "content": 0.00043995765736326575, "timestamp": "2025-09-10 02:28:05.947055", "step": 5800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:05.979533", "step": 5800, "epoch": 3 }, { "type": "loss", "content": 0.0005144139868207276, "timestamp": "2025-09-10 02:28:05.987903", "step": 5801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:06.019258", "step": 5801, "epoch": 3 }, { "type": "loss", "content": 0.0005805970868095756, "timestamp": "2025-09-10 02:28:06.029217", "step": 5802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:06.061900", "step": 5802, "epoch": 3 }, { "type": "loss", "content": 0.004789031110703945, "timestamp": "2025-09-10 02:28:06.069365", "step": 5803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:06.100953", "step": 5803, "epoch": 3 }, { "type": "loss", "content": 0.00047223473666235805, "timestamp": "2025-09-10 02:28:06.132849", "step": 5804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:06.165408", "step": 5804, "epoch": 3 }, { "type": "loss", "content": 0.000832175777759403, "timestamp": "2025-09-10 02:28:06.170104", "step": 5805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:06.201542", "step": 5805, "epoch": 3 }, { "type": "loss", "content": 0.00040307757444679737, "timestamp": "2025-09-10 02:28:06.212414", "step": 5806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:06.243676", "step": 5806, "epoch": 3 }, { "type": "loss", "content": 0.0005037328810431063, "timestamp": "2025-09-10 02:28:06.256030", "step": 5807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:06.287141", "step": 5807, "epoch": 3 }, { "type": "loss", "content": 0.0005597766139544547, "timestamp": "2025-09-10 02:28:06.314819", "step": 5808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:06.346542", "step": 5808, "epoch": 3 }, { "type": "loss", "content": 0.008857275359332561, "timestamp": "2025-09-10 02:28:06.354152", "step": 5809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:06.387344", "step": 5809, "epoch": 3 }, { "type": "loss", "content": 0.0009536809520795941, "timestamp": "2025-09-10 02:28:06.391070", "step": 5810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:06.423975", "step": 5810, "epoch": 3 }, { "type": "loss", "content": 0.0008017036016099155, "timestamp": "2025-09-10 02:28:06.430461", "step": 5811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:06.463001", "step": 5811, "epoch": 3 }, { "type": "loss", "content": 0.0008804936660453677, "timestamp": "2025-09-10 02:28:06.496465", "step": 5812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:28:06.534358", "step": 5812, "epoch": 3 }, { "type": "loss", "content": 0.0013134771725162864, "timestamp": "2025-09-10 02:28:06.549509", "step": 5813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:06.581408", "step": 5813, "epoch": 3 }, { "type": "loss", "content": 0.0001699960557743907, "timestamp": "2025-09-10 02:28:06.585313", "step": 5814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:06.620356", "step": 5814, "epoch": 3 }, { "type": "loss", "content": 0.00022905482910573483, "timestamp": "2025-09-10 02:28:06.627965", "step": 5815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:06.659439", "step": 5815, "epoch": 3 }, { "type": "loss", "content": 0.005473580211400986, "timestamp": "2025-09-10 02:28:06.684299", "step": 5816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:06.717091", "step": 5816, "epoch": 3 }, { "type": "loss", "content": 0.00022384269686881453, "timestamp": "2025-09-10 02:28:06.729757", "step": 5817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:06.761616", "step": 5817, "epoch": 3 }, { "type": "loss", "content": 0.0034808197524398565, "timestamp": "2025-09-10 02:28:06.771789", "step": 5818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:06.804176", "step": 5818, "epoch": 3 }, { "type": "loss", "content": 0.0008100624545477331, "timestamp": "2025-09-10 02:28:06.808494", "step": 5819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:06.840180", "step": 5819, "epoch": 3 }, { "type": "loss", "content": 0.0004766239726450294, "timestamp": "2025-09-10 02:28:06.865355", "step": 5820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:28:06.898933", "step": 5820, "epoch": 3 }, { "type": "loss", "content": 0.00017594116798136383, "timestamp": "2025-09-10 02:28:06.911961", "step": 5821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:06.948075", "step": 5821, "epoch": 3 }, { "type": "loss", "content": 8.833243191475049e-05, "timestamp": "2025-09-10 02:28:06.960014", "step": 5822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:06.992918", "step": 5822, "epoch": 3 }, { "type": "loss", "content": 0.00015262920351233333, "timestamp": "2025-09-10 02:28:06.999660", "step": 5823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:07.034959", "step": 5823, "epoch": 3 }, { "type": "loss", "content": 0.00041047646664083004, "timestamp": "2025-09-10 02:28:07.063560", "step": 5824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:07.096517", "step": 5824, "epoch": 3 }, { "type": "loss", "content": 0.0002567381889093667, "timestamp": "2025-09-10 02:28:07.098731", "step": 5825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:07.132739", "step": 5825, "epoch": 3 }, { "type": "loss", "content": 0.04794417694211006, "timestamp": "2025-09-10 02:28:07.136836", "step": 5826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:07.168906", "step": 5826, "epoch": 3 }, { "type": "loss", "content": 0.0002849227748811245, "timestamp": "2025-09-10 02:28:07.175809", "step": 5827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:07.206278", "step": 5827, "epoch": 3 }, { "type": "loss", "content": 0.000525909592397511, "timestamp": "2025-09-10 02:28:07.234310", "step": 5828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:07.265525", "step": 5828, "epoch": 3 }, { "type": "loss", "content": 0.0005727821262553334, "timestamp": "2025-09-10 02:28:07.270526", "step": 5829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:07.308371", "step": 5829, "epoch": 3 }, { "type": "loss", "content": 0.00017900993407238275, "timestamp": "2025-09-10 02:28:07.317078", "step": 5830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:07.355057", "step": 5830, "epoch": 3 }, { "type": "loss", "content": 0.00022803548199590296, "timestamp": "2025-09-10 02:28:07.359320", "step": 5831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:07.390801", "step": 5831, "epoch": 3 }, { "type": "loss", "content": 0.00024710877914913, "timestamp": "2025-09-10 02:28:07.416686", "step": 5832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:07.447536", "step": 5832, "epoch": 3 }, { "type": "loss", "content": 0.00446285679936409, "timestamp": "2025-09-10 02:28:07.450094", "step": 5833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:07.480424", "step": 5833, "epoch": 3 }, { "type": "loss", "content": 0.0004049288108944893, "timestamp": "2025-09-10 02:28:07.488080", "step": 5834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:07.524052", "step": 5834, "epoch": 3 }, { "type": "loss", "content": 0.00038109347224235535, "timestamp": "2025-09-10 02:28:07.531088", "step": 5835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:07.563022", "step": 5835, "epoch": 3 }, { "type": "loss", "content": 0.00032000825740396976, "timestamp": "2025-09-10 02:28:07.587896", "step": 5836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:07.619266", "step": 5836, "epoch": 3 }, { "type": "loss", "content": 0.0012009877245873213, "timestamp": "2025-09-10 02:28:07.624814", "step": 5837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:07.655720", "step": 5837, "epoch": 3 }, { "type": "loss", "content": 0.0006187800318002701, "timestamp": "2025-09-10 02:28:07.663362", "step": 5838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:07.697095", "step": 5838, "epoch": 3 }, { "type": "loss", "content": 0.0020875423215329647, "timestamp": "2025-09-10 02:28:07.710498", "step": 5839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:07.741626", "step": 5839, "epoch": 3 }, { "type": "loss", "content": 0.0002056649245787412, "timestamp": "2025-09-10 02:28:07.769542", "step": 5840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:07.800908", "step": 5840, "epoch": 3 }, { "type": "loss", "content": 0.0004982929094694555, "timestamp": "2025-09-10 02:28:07.803353", "step": 5841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:07.835570", "step": 5841, "epoch": 3 }, { "type": "loss", "content": 0.000268876610789448, "timestamp": "2025-09-10 02:28:07.842998", "step": 5842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:07.876751", "step": 5842, "epoch": 3 }, { "type": "loss", "content": 0.0005106105236336589, "timestamp": "2025-09-10 02:28:07.890146", "step": 5843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:07.924353", "step": 5843, "epoch": 3 }, { "type": "loss", "content": 7.785356137901545e-05, "timestamp": "2025-09-10 02:28:07.952639", "step": 5844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:07.983801", "step": 5844, "epoch": 3 }, { "type": "loss", "content": 0.0004512739833444357, "timestamp": "2025-09-10 02:28:07.991443", "step": 5845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:08.023478", "step": 5845, "epoch": 3 }, { "type": "loss", "content": 0.0006680196383967996, "timestamp": "2025-09-10 02:28:08.027553", "step": 5846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.058692", "step": 5846, "epoch": 3 }, { "type": "loss", "content": 0.0003025097248610109, "timestamp": "2025-09-10 02:28:08.065598", "step": 5847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.097241", "step": 5847, "epoch": 3 }, { "type": "loss", "content": 0.005206712055951357, "timestamp": "2025-09-10 02:28:08.125006", "step": 5848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.156546", "step": 5848, "epoch": 3 }, { "type": "loss", "content": 0.008312045596539974, "timestamp": "2025-09-10 02:28:08.161170", "step": 5849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:08.192371", "step": 5849, "epoch": 3 }, { "type": "loss", "content": 0.0003203331143595278, "timestamp": "2025-09-10 02:28:08.199841", "step": 5850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:28:08.238100", "step": 5850, "epoch": 3 }, { "type": "loss", "content": 0.0007190610049292445, "timestamp": "2025-09-10 02:28:08.253892", "step": 5851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:08.285276", "step": 5851, "epoch": 3 }, { "type": "loss", "content": 0.0019516788888722658, "timestamp": "2025-09-10 02:28:08.310723", "step": 5852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:28:08.343465", "step": 5852, "epoch": 3 }, { "type": "loss", "content": 0.0022987746633589268, "timestamp": "2025-09-10 02:28:08.356560", "step": 5853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:08.387174", "step": 5853, "epoch": 3 }, { "type": "loss", "content": 0.00023405192769132555, "timestamp": "2025-09-10 02:28:08.389924", "step": 5854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:08.421570", "step": 5854, "epoch": 3 }, { "type": "loss", "content": 0.00021774417837150395, "timestamp": "2025-09-10 02:28:08.429305", "step": 5855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:08.461401", "step": 5855, "epoch": 3 }, { "type": "loss", "content": 0.00024362494878005236, "timestamp": "2025-09-10 02:28:08.492667", "step": 5856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:08.525393", "step": 5856, "epoch": 3 }, { "type": "loss", "content": 0.0006450935616157949, "timestamp": "2025-09-10 02:28:08.530034", "step": 5857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:08.561832", "step": 5857, "epoch": 3 }, { "type": "loss", "content": 0.0009278419311158359, "timestamp": "2025-09-10 02:28:08.569270", "step": 5858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.602237", "step": 5858, "epoch": 3 }, { "type": "loss", "content": 0.0007209957693703473, "timestamp": "2025-09-10 02:28:08.608970", "step": 5859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:08.639784", "step": 5859, "epoch": 3 }, { "type": "loss", "content": 5.901495387661271e-05, "timestamp": "2025-09-10 02:28:08.663893", "step": 5860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.697450", "step": 5860, "epoch": 3 }, { "type": "loss", "content": 0.000261887616943568, "timestamp": "2025-09-10 02:28:08.701869", "step": 5861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:08.748593", "step": 5861, "epoch": 3 }, { "type": "loss", "content": 0.002333016600459814, "timestamp": "2025-09-10 02:28:08.753260", "step": 5862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:08.785064", "step": 5862, "epoch": 3 }, { "type": "loss", "content": 0.00048499341937713325, "timestamp": "2025-09-10 02:28:08.792648", "step": 5863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:08.824162", "step": 5863, "epoch": 3 }, { "type": "loss", "content": 0.0001742523891152814, "timestamp": "2025-09-10 02:28:08.852884", "step": 5864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:08.884034", "step": 5864, "epoch": 3 }, { "type": "loss", "content": 0.00020447876886464655, "timestamp": "2025-09-10 02:28:08.889211", "step": 5865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:08.922986", "step": 5865, "epoch": 3 }, { "type": "loss", "content": 0.0033087453339248896, "timestamp": "2025-09-10 02:28:08.935260", "step": 5866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:08.965827", "step": 5866, "epoch": 3 }, { "type": "loss", "content": 0.000428111816290766, "timestamp": "2025-09-10 02:28:08.972751", "step": 5867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:09.004500", "step": 5867, "epoch": 3 }, { "type": "loss", "content": 0.00031185123953036964, "timestamp": "2025-09-10 02:28:09.031969", "step": 5868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:09.064488", "step": 5868, "epoch": 3 }, { "type": "loss", "content": 7.067446131259203e-05, "timestamp": "2025-09-10 02:28:09.072320", "step": 5869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:09.103897", "step": 5869, "epoch": 3 }, { "type": "loss", "content": 0.0012084973277524114, "timestamp": "2025-09-10 02:28:09.111514", "step": 5870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:09.148832", "step": 5870, "epoch": 3 }, { "type": "loss", "content": 0.0005114732775837183, "timestamp": "2025-09-10 02:28:09.153362", "step": 5871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:09.191671", "step": 5871, "epoch": 3 }, { "type": "loss", "content": 0.00019311138021294028, "timestamp": "2025-09-10 02:28:09.219678", "step": 5872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:09.259004", "step": 5872, "epoch": 3 }, { "type": "loss", "content": 0.00104613380972296, "timestamp": "2025-09-10 02:28:09.264427", "step": 5873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:09.300484", "step": 5873, "epoch": 3 }, { "type": "loss", "content": 6.62386228214018e-05, "timestamp": "2025-09-10 02:28:09.308028", "step": 5874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:09.340490", "step": 5874, "epoch": 3 }, { "type": "loss", "content": 9.228465205524117e-05, "timestamp": "2025-09-10 02:28:09.350633", "step": 5875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:09.382056", "step": 5875, "epoch": 3 }, { "type": "loss", "content": 7.762354653095827e-05, "timestamp": "2025-09-10 02:28:09.410773", "step": 5876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:09.441904", "step": 5876, "epoch": 3 }, { "type": "loss", "content": 0.0005466092843562365, "timestamp": "2025-09-10 02:28:09.447342", "step": 5877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:09.479585", "step": 5877, "epoch": 3 }, { "type": "loss", "content": 0.0009989795507863164, "timestamp": "2025-09-10 02:28:09.483748", "step": 5878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:09.514404", "step": 5878, "epoch": 3 }, { "type": "loss", "content": 7.646583253517747e-05, "timestamp": "2025-09-10 02:28:09.521969", "step": 5879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:09.552415", "step": 5879, "epoch": 3 }, { "type": "loss", "content": 0.0011240827152505517, "timestamp": "2025-09-10 02:28:09.577800", "step": 5880, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:28:19.889322", "step": 5880, "epoch": 3 }, { "type": "pplx", "content": 23481078.320965376, "timestamp": "2025-09-10 02:28:19.892251", "step": 5880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:19.922083", "step": 5880, "epoch": 3 }, { "type": "loss", "content": 0.00028165520052425563, "timestamp": "2025-09-10 02:28:19.926300", "step": 5881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:19.957969", "step": 5881, "epoch": 3 }, { "type": "loss", "content": 0.0006952984258532524, "timestamp": "2025-09-10 02:28:19.967638", "step": 5882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:20.007260", "step": 5882, "epoch": 3 }, { "type": "loss", "content": 0.0002651652612257749, "timestamp": "2025-09-10 02:28:20.023204", "step": 5883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:20.053550", "step": 5883, "epoch": 3 }, { "type": "loss", "content": 0.00028942085918970406, "timestamp": "2025-09-10 02:28:20.078433", "step": 5884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:20.109579", "step": 5884, "epoch": 3 }, { "type": "loss", "content": 5.7478438975522295e-05, "timestamp": "2025-09-10 02:28:20.114673", "step": 5885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:20.145848", "step": 5885, "epoch": 3 }, { "type": "loss", "content": 9.486764611210674e-05, "timestamp": "2025-09-10 02:28:20.158042", "step": 5886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:20.188926", "step": 5886, "epoch": 3 }, { "type": "loss", "content": 0.00015322092804126441, "timestamp": "2025-09-10 02:28:20.199824", "step": 5887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:20.231887", "step": 5887, "epoch": 3 }, { "type": "loss", "content": 0.0002798614732455462, "timestamp": "2025-09-10 02:28:20.257063", "step": 5888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:28:20.293618", "step": 5888, "epoch": 3 }, { "type": "loss", "content": 0.011071518063545227, "timestamp": "2025-09-10 02:28:20.309306", "step": 5889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:20.341402", "step": 5889, "epoch": 3 }, { "type": "loss", "content": 5.499214239534922e-05, "timestamp": "2025-09-10 02:28:20.352272", "step": 5890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:20.383929", "step": 5890, "epoch": 3 }, { "type": "loss", "content": 0.00014012886094860733, "timestamp": "2025-09-10 02:28:20.396510", "step": 5891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:20.427689", "step": 5891, "epoch": 3 }, { "type": "loss", "content": 0.0002186378842452541, "timestamp": "2025-09-10 02:28:20.453131", "step": 5892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:20.485108", "step": 5892, "epoch": 3 }, { "type": "loss", "content": 0.00012612577120307833, "timestamp": "2025-09-10 02:28:20.490130", "step": 5893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:28:20.521235", "step": 5893, "epoch": 3 }, { "type": "loss", "content": 0.0001257530675502494, "timestamp": "2025-09-10 02:28:20.523900", "step": 5894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:20.557348", "step": 5894, "epoch": 3 }, { "type": "loss", "content": 9.036817209562287e-05, "timestamp": "2025-09-10 02:28:20.561710", "step": 5895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:28:20.608903", "step": 5895, "epoch": 3 }, { "type": "loss", "content": 0.024240778759121895, "timestamp": "2025-09-10 02:28:20.647471", "step": 5896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:20.684847", "step": 5896, "epoch": 3 }, { "type": "loss", "content": 7.944705430418253e-05, "timestamp": "2025-09-10 02:28:20.693630", "step": 5897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:20.732919", "step": 5897, "epoch": 3 }, { "type": "loss", "content": 0.00011858268408104777, "timestamp": "2025-09-10 02:28:20.736885", "step": 5898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:20.770947", "step": 5898, "epoch": 3 }, { "type": "loss", "content": 0.002331020077690482, "timestamp": "2025-09-10 02:28:20.777874", "step": 5899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:20.813776", "step": 5899, "epoch": 3 }, { "type": "loss", "content": 0.00038511460297740996, "timestamp": "2025-09-10 02:28:20.841646", "step": 5900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:20.878522", "step": 5900, "epoch": 3 }, { "type": "loss", "content": 0.00011608708882704377, "timestamp": "2025-09-10 02:28:20.886729", "step": 5901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:20.917807", "step": 5901, "epoch": 3 }, { "type": "loss", "content": 0.002312576165422797, "timestamp": "2025-09-10 02:28:20.920438", "step": 5902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:20.958793", "step": 5902, "epoch": 3 }, { "type": "loss", "content": 0.002418461488559842, "timestamp": "2025-09-10 02:28:20.965545", "step": 5903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:20.997621", "step": 5903, "epoch": 3 }, { "type": "loss", "content": 0.0005505615263246, "timestamp": "2025-09-10 02:28:21.025586", "step": 5904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:21.058667", "step": 5904, "epoch": 3 }, { "type": "loss", "content": 0.00045085299643687904, "timestamp": "2025-09-10 02:28:21.068386", "step": 5905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:21.104652", "step": 5905, "epoch": 3 }, { "type": "loss", "content": 6.284094706643373e-05, "timestamp": "2025-09-10 02:28:21.111493", "step": 5906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:21.145812", "step": 5906, "epoch": 3 }, { "type": "loss", "content": 0.0002078805264318362, "timestamp": "2025-09-10 02:28:21.159192", "step": 5907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:21.199027", "step": 5907, "epoch": 3 }, { "type": "loss", "content": 0.03610233590006828, "timestamp": "2025-09-10 02:28:21.227763", "step": 5908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:28:21.276310", "step": 5908, "epoch": 3 }, { "type": "loss", "content": 0.0009849478956311941, "timestamp": "2025-09-10 02:28:21.298098", "step": 5909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:21.334078", "step": 5909, "epoch": 3 }, { "type": "loss", "content": 0.00027651750133372843, "timestamp": "2025-09-10 02:28:21.344901", "step": 5910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:21.378005", "step": 5910, "epoch": 3 }, { "type": "loss", "content": 0.0013139198999851942, "timestamp": "2025-09-10 02:28:21.385789", "step": 5911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:28:21.446447", "step": 5911, "epoch": 3 }, { "type": "loss", "content": 0.00023808155674487352, "timestamp": "2025-09-10 02:28:21.482942", "step": 5912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:21.530354", "step": 5912, "epoch": 3 }, { "type": "loss", "content": 0.000658250879496336, "timestamp": "2025-09-10 02:28:21.536351", "step": 5913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:21.568875", "step": 5913, "epoch": 3 }, { "type": "loss", "content": 0.00039495486998930573, "timestamp": "2025-09-10 02:28:21.573402", "step": 5914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:21.620099", "step": 5914, "epoch": 3 }, { "type": "loss", "content": 0.012653195299208164, "timestamp": "2025-09-10 02:28:21.633469", "step": 5915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:28:21.685057", "step": 5915, "epoch": 3 }, { "type": "loss", "content": 0.0027082362212240696, "timestamp": "2025-09-10 02:28:21.723071", "step": 5916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:28:21.773382", "step": 5916, "epoch": 3 }, { "type": "loss", "content": 0.0002292887365911156, "timestamp": "2025-09-10 02:28:21.786732", "step": 5917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:21.829150", "step": 5917, "epoch": 3 }, { "type": "loss", "content": 7.886863750172779e-05, "timestamp": "2025-09-10 02:28:21.836156", "step": 5918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:21.874513", "step": 5918, "epoch": 3 }, { "type": "loss", "content": 0.01980876363813877, "timestamp": "2025-09-10 02:28:21.886855", "step": 5919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:21.922524", "step": 5919, "epoch": 3 }, { "type": "loss", "content": 0.0007908547413535416, "timestamp": "2025-09-10 02:28:21.954458", "step": 5920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:21.990423", "step": 5920, "epoch": 3 }, { "type": "loss", "content": 0.00044035873725079, "timestamp": "2025-09-10 02:28:21.998468", "step": 5921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:22.031255", "step": 5921, "epoch": 3 }, { "type": "loss", "content": 0.00018272080342285335, "timestamp": "2025-09-10 02:28:22.043192", "step": 5922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:22.084988", "step": 5922, "epoch": 3 }, { "type": "loss", "content": 0.02625429444015026, "timestamp": "2025-09-10 02:28:22.089699", "step": 5923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.136258", "step": 5923, "epoch": 3 }, { "type": "loss", "content": 0.00017044544802047312, "timestamp": "2025-09-10 02:28:22.164393", "step": 5924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:22.200917", "step": 5924, "epoch": 3 }, { "type": "loss", "content": 0.00031413830583915114, "timestamp": "2025-09-10 02:28:22.208548", "step": 5925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:22.245116", "step": 5925, "epoch": 3 }, { "type": "loss", "content": 0.0015044523170217872, "timestamp": "2025-09-10 02:28:22.251901", "step": 5926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.284117", "step": 5926, "epoch": 3 }, { "type": "loss", "content": 8.876120409695432e-05, "timestamp": "2025-09-10 02:28:22.291415", "step": 5927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:22.322691", "step": 5927, "epoch": 3 }, { "type": "loss", "content": 0.0020055093336850405, "timestamp": "2025-09-10 02:28:22.350437", "step": 5928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:22.386625", "step": 5928, "epoch": 3 }, { "type": "loss", "content": 0.002611653646454215, "timestamp": "2025-09-10 02:28:22.394203", "step": 5929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:22.427632", "step": 5929, "epoch": 3 }, { "type": "loss", "content": 9.566867083776742e-05, "timestamp": "2025-09-10 02:28:22.437509", "step": 5930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:22.469507", "step": 5930, "epoch": 3 }, { "type": "loss", "content": 0.00027315152692608535, "timestamp": "2025-09-10 02:28:22.473157", "step": 5931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.506743", "step": 5931, "epoch": 3 }, { "type": "loss", "content": 0.00031735419179312885, "timestamp": "2025-09-10 02:28:22.536072", "step": 5932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.569249", "step": 5932, "epoch": 3 }, { "type": "loss", "content": 0.0007971972227096558, "timestamp": "2025-09-10 02:28:22.574548", "step": 5933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.609284", "step": 5933, "epoch": 3 }, { "type": "loss", "content": 0.0020833786111325026, "timestamp": "2025-09-10 02:28:22.616835", "step": 5934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:22.650154", "step": 5934, "epoch": 3 }, { "type": "loss", "content": 0.014346832409501076, "timestamp": "2025-09-10 02:28:22.652891", "step": 5935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:22.684922", "step": 5935, "epoch": 3 }, { "type": "loss", "content": 0.00011734214058378711, "timestamp": "2025-09-10 02:28:22.709802", "step": 5936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:22.742162", "step": 5936, "epoch": 3 }, { "type": "loss", "content": 0.0005824709078297019, "timestamp": "2025-09-10 02:28:22.747280", "step": 5937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:22.784761", "step": 5937, "epoch": 3 }, { "type": "loss", "content": 0.0002061406703433022, "timestamp": "2025-09-10 02:28:22.797027", "step": 5938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:22.834515", "step": 5938, "epoch": 3 }, { "type": "loss", "content": 0.000224357980187051, "timestamp": "2025-09-10 02:28:22.838521", "step": 5939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:22.874457", "step": 5939, "epoch": 3 }, { "type": "loss", "content": 0.0007039483753032982, "timestamp": "2025-09-10 02:28:22.907412", "step": 5940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:22.953965", "step": 5940, "epoch": 3 }, { "type": "loss", "content": 0.00016574481560382992, "timestamp": "2025-09-10 02:28:22.958355", "step": 5941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:22.990183", "step": 5941, "epoch": 3 }, { "type": "loss", "content": 0.0001796074939193204, "timestamp": "2025-09-10 02:28:22.994743", "step": 5942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:23.028030", "step": 5942, "epoch": 3 }, { "type": "loss", "content": 0.00036119503783993423, "timestamp": "2025-09-10 02:28:23.037850", "step": 5943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:23.072595", "step": 5943, "epoch": 3 }, { "type": "loss", "content": 9.575783769832924e-05, "timestamp": "2025-09-10 02:28:23.100766", "step": 5944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.132544", "step": 5944, "epoch": 3 }, { "type": "loss", "content": 0.00012964828056283295, "timestamp": "2025-09-10 02:28:23.137289", "step": 5945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:23.172581", "step": 5945, "epoch": 3 }, { "type": "loss", "content": 0.00028890607063658535, "timestamp": "2025-09-10 02:28:23.184799", "step": 5946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.219483", "step": 5946, "epoch": 3 }, { "type": "loss", "content": 0.0002000013628276065, "timestamp": "2025-09-10 02:28:23.226663", "step": 5947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:23.264835", "step": 5947, "epoch": 3 }, { "type": "loss", "content": 0.0011124643497169018, "timestamp": "2025-09-10 02:28:23.289841", "step": 5948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.320747", "step": 5948, "epoch": 3 }, { "type": "loss", "content": 0.000164168028277345, "timestamp": "2025-09-10 02:28:23.325561", "step": 5949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.357978", "step": 5949, "epoch": 3 }, { "type": "loss", "content": 0.00011684057244565338, "timestamp": "2025-09-10 02:28:23.365046", "step": 5950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:23.395277", "step": 5950, "epoch": 3 }, { "type": "loss", "content": 9.600551129551604e-05, "timestamp": "2025-09-10 02:28:23.405621", "step": 5951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:23.436969", "step": 5951, "epoch": 3 }, { "type": "loss", "content": 0.0005512385396286845, "timestamp": "2025-09-10 02:28:23.470138", "step": 5952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:23.502824", "step": 5952, "epoch": 3 }, { "type": "loss", "content": 7.65290460549295e-05, "timestamp": "2025-09-10 02:28:23.511503", "step": 5953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:23.559888", "step": 5953, "epoch": 3 }, { "type": "loss", "content": 0.00011048233864130452, "timestamp": "2025-09-10 02:28:23.566592", "step": 5954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:23.604284", "step": 5954, "epoch": 3 }, { "type": "loss", "content": 0.00010653473873389885, "timestamp": "2025-09-10 02:28:23.612019", "step": 5955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.652712", "step": 5955, "epoch": 3 }, { "type": "loss", "content": 4.3004063627449796e-05, "timestamp": "2025-09-10 02:28:23.680551", "step": 5956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.711182", "step": 5956, "epoch": 3 }, { "type": "loss", "content": 0.00041585671715438366, "timestamp": "2025-09-10 02:28:23.715771", "step": 5957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:23.747079", "step": 5957, "epoch": 3 }, { "type": "loss", "content": 0.0007976609631441534, "timestamp": "2025-09-10 02:28:23.754701", "step": 5958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:23.796867", "step": 5958, "epoch": 3 }, { "type": "loss", "content": 0.0035458316560834646, "timestamp": "2025-09-10 02:28:23.803781", "step": 5959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:23.835245", "step": 5959, "epoch": 3 }, { "type": "loss", "content": 0.00028361781733110547, "timestamp": "2025-09-10 02:28:23.863166", "step": 5960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:23.894704", "step": 5960, "epoch": 3 }, { "type": "loss", "content": 0.00024366001889575273, "timestamp": "2025-09-10 02:28:23.899409", "step": 5961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:23.930501", "step": 5961, "epoch": 3 }, { "type": "loss", "content": 0.00015509971126448363, "timestamp": "2025-09-10 02:28:23.938150", "step": 5962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:23.975810", "step": 5962, "epoch": 3 }, { "type": "loss", "content": 0.0010104191023856401, "timestamp": "2025-09-10 02:28:23.979823", "step": 5963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:28:24.029868", "step": 5963, "epoch": 3 }, { "type": "loss", "content": 0.00019829573284368962, "timestamp": "2025-09-10 02:28:24.066464", "step": 5964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:24.105446", "step": 5964, "epoch": 3 }, { "type": "loss", "content": 9.516144200460985e-05, "timestamp": "2025-09-10 02:28:24.112665", "step": 5965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:24.144653", "step": 5965, "epoch": 3 }, { "type": "loss", "content": 0.0004203191492706537, "timestamp": "2025-09-10 02:28:24.151383", "step": 5966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:24.185517", "step": 5966, "epoch": 3 }, { "type": "loss", "content": 0.013540414161980152, "timestamp": "2025-09-10 02:28:24.197692", "step": 5967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:24.235833", "step": 5967, "epoch": 3 }, { "type": "loss", "content": 0.0008812797605060041, "timestamp": "2025-09-10 02:28:24.267088", "step": 5968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:24.300779", "step": 5968, "epoch": 3 }, { "type": "loss", "content": 0.00030735571635887027, "timestamp": "2025-09-10 02:28:24.322539", "step": 5969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:24.361963", "step": 5969, "epoch": 3 }, { "type": "loss", "content": 7.28549639461562e-05, "timestamp": "2025-09-10 02:28:24.368722", "step": 5970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:24.402502", "step": 5970, "epoch": 3 }, { "type": "loss", "content": 0.0002149459905922413, "timestamp": "2025-09-10 02:28:24.410284", "step": 5971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:24.445827", "step": 5971, "epoch": 3 }, { "type": "loss", "content": 0.0018557047005742788, "timestamp": "2025-09-10 02:28:24.472269", "step": 5972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:24.507841", "step": 5972, "epoch": 3 }, { "type": "loss", "content": 0.00019703614816535264, "timestamp": "2025-09-10 02:28:24.510013", "step": 5973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:24.550589", "step": 5973, "epoch": 3 }, { "type": "loss", "content": 0.000475127570098266, "timestamp": "2025-09-10 02:28:24.557956", "step": 5974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:24.592600", "step": 5974, "epoch": 3 }, { "type": "loss", "content": 0.00028725885204039514, "timestamp": "2025-09-10 02:28:24.605998", "step": 5975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:24.645424", "step": 5975, "epoch": 3 }, { "type": "loss", "content": 0.0018704243702813983, "timestamp": "2025-09-10 02:28:24.682214", "step": 5976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:24.713623", "step": 5976, "epoch": 3 }, { "type": "loss", "content": 0.0017203286988660693, "timestamp": "2025-09-10 02:28:24.721879", "step": 5977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:24.755433", "step": 5977, "epoch": 3 }, { "type": "loss", "content": 0.00017131041386164725, "timestamp": "2025-09-10 02:28:24.762523", "step": 5978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:24.800064", "step": 5978, "epoch": 3 }, { "type": "loss", "content": 0.0002911267220042646, "timestamp": "2025-09-10 02:28:24.806746", "step": 5979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:24.839679", "step": 5979, "epoch": 3 }, { "type": "loss", "content": 0.001717909937724471, "timestamp": "2025-09-10 02:28:24.871236", "step": 5980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:24.904187", "step": 5980, "epoch": 3 }, { "type": "loss", "content": 0.00016485284140799195, "timestamp": "2025-09-10 02:28:24.914518", "step": 5981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:24.945807", "step": 5981, "epoch": 3 }, { "type": "loss", "content": 0.001138357212767005, "timestamp": "2025-09-10 02:28:24.956643", "step": 5982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:24.997444", "step": 5982, "epoch": 3 }, { "type": "loss", "content": 0.0005265086074359715, "timestamp": "2025-09-10 02:28:25.009992", "step": 5983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:25.045539", "step": 5983, "epoch": 3 }, { "type": "loss", "content": 0.0021492692176252604, "timestamp": "2025-09-10 02:28:25.073142", "step": 5984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:25.111871", "step": 5984, "epoch": 3 }, { "type": "loss", "content": 0.0007208751630969346, "timestamp": "2025-09-10 02:28:25.120136", "step": 5985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:28:25.163502", "step": 5985, "epoch": 3 }, { "type": "loss", "content": 0.0015368768945336342, "timestamp": "2025-09-10 02:28:25.181153", "step": 5986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:28:25.219981", "step": 5986, "epoch": 3 }, { "type": "loss", "content": 6.340054824249819e-05, "timestamp": "2025-09-10 02:28:25.235639", "step": 5987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:25.269918", "step": 5987, "epoch": 3 }, { "type": "loss", "content": 0.0003260863886680454, "timestamp": "2025-09-10 02:28:25.294827", "step": 5988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:25.332469", "step": 5988, "epoch": 3 }, { "type": "loss", "content": 0.00010763067984953523, "timestamp": "2025-09-10 02:28:25.336705", "step": 5989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:25.368246", "step": 5989, "epoch": 3 }, { "type": "loss", "content": 0.00016242521815001965, "timestamp": "2025-09-10 02:28:25.372341", "step": 5990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:25.412530", "step": 5990, "epoch": 3 }, { "type": "loss", "content": 0.00010946859401883557, "timestamp": "2025-09-10 02:28:25.416923", "step": 5991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:25.457416", "step": 5991, "epoch": 3 }, { "type": "loss", "content": 0.0001512065064162016, "timestamp": "2025-09-10 02:28:25.488859", "step": 5992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:25.526786", "step": 5992, "epoch": 3 }, { "type": "loss", "content": 0.00014350096171256155, "timestamp": "2025-09-10 02:28:25.531078", "step": 5993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:25.566643", "step": 5993, "epoch": 3 }, { "type": "loss", "content": 0.0001927161356434226, "timestamp": "2025-09-10 02:28:25.570732", "step": 5994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:28:25.602996", "step": 5994, "epoch": 3 }, { "type": "loss", "content": 0.0001801040634745732, "timestamp": "2025-09-10 02:28:25.606374", "step": 5995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:25.645401", "step": 5995, "epoch": 3 }, { "type": "loss", "content": 0.0004571221652440727, "timestamp": "2025-09-10 02:28:25.675773", "step": 5996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:25.711063", "step": 5996, "epoch": 3 }, { "type": "loss", "content": 0.0004833031562156975, "timestamp": "2025-09-10 02:28:25.724210", "step": 5997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:25.758522", "step": 5997, "epoch": 3 }, { "type": "loss", "content": 8.242072362918407e-05, "timestamp": "2025-09-10 02:28:25.765098", "step": 5998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:25.804980", "step": 5998, "epoch": 3 }, { "type": "loss", "content": 0.0008570431964471936, "timestamp": "2025-09-10 02:28:25.811600", "step": 5999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:25.844995", "step": 5999, "epoch": 3 }, { "type": "loss", "content": 0.0004085947584826499, "timestamp": "2025-09-10 02:28:25.877238", "step": 6000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 6000", "timestamp": "2025-09-10 02:28:30.932741", "step": 6000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:30.970145", "step": 6000, "epoch": 3 }, { "type": "loss", "content": 8.179119322448969e-05, "timestamp": "2025-09-10 02:28:30.977508", "step": 6001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:31.010049", "step": 6001, "epoch": 3 }, { "type": "loss", "content": 0.05771319940686226, "timestamp": "2025-09-10 02:28:31.022181", "step": 6002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:31.064762", "step": 6002, "epoch": 3 }, { "type": "loss", "content": 0.00022734318918082863, "timestamp": "2025-09-10 02:28:31.077998", "step": 6003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:31.112251", "step": 6003, "epoch": 3 }, { "type": "loss", "content": 0.00017205321637447923, "timestamp": "2025-09-10 02:28:31.139626", "step": 6004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:31.173783", "step": 6004, "epoch": 3 }, { "type": "loss", "content": 9.907536150421947e-05, "timestamp": "2025-09-10 02:28:31.181115", "step": 6005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:31.225328", "step": 6005, "epoch": 3 }, { "type": "loss", "content": 0.00015806824376340955, "timestamp": "2025-09-10 02:28:31.228995", "step": 6006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:31.263698", "step": 6006, "epoch": 3 }, { "type": "loss", "content": 0.005198474042117596, "timestamp": "2025-09-10 02:28:31.273239", "step": 6007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:31.306749", "step": 6007, "epoch": 3 }, { "type": "loss", "content": 0.0007690453785471618, "timestamp": "2025-09-10 02:28:31.337819", "step": 6008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:31.378571", "step": 6008, "epoch": 3 }, { "type": "loss", "content": 0.00019263003196101636, "timestamp": "2025-09-10 02:28:31.382186", "step": 6009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:31.431013", "step": 6009, "epoch": 3 }, { "type": "loss", "content": 0.044559430330991745, "timestamp": "2025-09-10 02:28:31.438377", "step": 6010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:31.488260", "step": 6010, "epoch": 3 }, { "type": "loss", "content": 0.00024100964947137982, "timestamp": "2025-09-10 02:28:31.502012", "step": 6011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:31.535576", "step": 6011, "epoch": 3 }, { "type": "loss", "content": 0.00010909455158980563, "timestamp": "2025-09-10 02:28:31.566772", "step": 6012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:31.603718", "step": 6012, "epoch": 3 }, { "type": "loss", "content": 0.0001878739712992683, "timestamp": "2025-09-10 02:28:31.606235", "step": 6013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:31.640132", "step": 6013, "epoch": 3 }, { "type": "loss", "content": 0.00019220814283471555, "timestamp": "2025-09-10 02:28:31.643888", "step": 6014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:31.677008", "step": 6014, "epoch": 3 }, { "type": "loss", "content": 0.00022207196161616594, "timestamp": "2025-09-10 02:28:31.688506", "step": 6015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:28:31.747631", "step": 6015, "epoch": 3 }, { "type": "loss", "content": 0.0015352964401245117, "timestamp": "2025-09-10 02:28:31.785608", "step": 6016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:31.829619", "step": 6016, "epoch": 3 }, { "type": "loss", "content": 0.0025681753177195787, "timestamp": "2025-09-10 02:28:31.834054", "step": 6017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:31.866562", "step": 6017, "epoch": 3 }, { "type": "loss", "content": 8.73608369147405e-05, "timestamp": "2025-09-10 02:28:31.873272", "step": 6018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:31.913411", "step": 6018, "epoch": 3 }, { "type": "loss", "content": 0.0003728985320776701, "timestamp": "2025-09-10 02:28:31.929287", "step": 6019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:28:31.967247", "step": 6019, "epoch": 3 }, { "type": "loss", "content": 0.00013485472300089896, "timestamp": "2025-09-10 02:28:31.991264", "step": 6020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:32.024606", "step": 6020, "epoch": 3 }, { "type": "loss", "content": 0.0008274485589936376, "timestamp": "2025-09-10 02:28:32.031550", "step": 6021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:32.066679", "step": 6021, "epoch": 3 }, { "type": "loss", "content": 0.00025299013941548765, "timestamp": "2025-09-10 02:28:32.080063", "step": 6022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:32.113274", "step": 6022, "epoch": 3 }, { "type": "loss", "content": 0.0001833633432397619, "timestamp": "2025-09-10 02:28:32.120704", "step": 6023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:32.160564", "step": 6023, "epoch": 3 }, { "type": "loss", "content": 7.197562081273645e-05, "timestamp": "2025-09-10 02:28:32.186162", "step": 6024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:32.219417", "step": 6024, "epoch": 3 }, { "type": "loss", "content": 0.00024802552070468664, "timestamp": "2025-09-10 02:28:32.224460", "step": 6025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:32.281475", "step": 6025, "epoch": 3 }, { "type": "loss", "content": 0.000184178032213822, "timestamp": "2025-09-10 02:28:32.291170", "step": 6026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:32.323527", "step": 6026, "epoch": 3 }, { "type": "loss", "content": 0.015562635846436024, "timestamp": "2025-09-10 02:28:32.334078", "step": 6027, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:28:42.928241", "step": 6027, "epoch": 3 }, { "type": "pplx", "content": 24656336.660595033, "timestamp": "2025-09-10 02:28:42.931831", "step": 6027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:42.964536", "step": 6027, "epoch": 3 }, { "type": "loss", "content": 0.0004599474195856601, "timestamp": "2025-09-10 02:28:42.996271", "step": 6028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:43.041688", "step": 6028, "epoch": 3 }, { "type": "loss", "content": 0.0006434383685700595, "timestamp": "2025-09-10 02:28:43.052048", "step": 6029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:43.099411", "step": 6029, "epoch": 3 }, { "type": "loss", "content": 0.0006290274322964251, "timestamp": "2025-09-10 02:28:43.109959", "step": 6030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:43.149728", "step": 6030, "epoch": 3 }, { "type": "loss", "content": 0.0011963268043473363, "timestamp": "2025-09-10 02:28:43.163589", "step": 6031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:28:43.211356", "step": 6031, "epoch": 3 }, { "type": "loss", "content": 0.00023444702674169093, "timestamp": "2025-09-10 02:28:43.248438", "step": 6032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:43.285507", "step": 6032, "epoch": 3 }, { "type": "loss", "content": 0.0004736782575491816, "timestamp": "2025-09-10 02:28:43.290490", "step": 6033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:43.330232", "step": 6033, "epoch": 3 }, { "type": "loss", "content": 0.0014104725560173392, "timestamp": "2025-09-10 02:28:43.342832", "step": 6034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:43.376100", "step": 6034, "epoch": 3 }, { "type": "loss", "content": 0.014761857688426971, "timestamp": "2025-09-10 02:28:43.386327", "step": 6035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:43.427628", "step": 6035, "epoch": 3 }, { "type": "loss", "content": 0.0003512998518999666, "timestamp": "2025-09-10 02:28:43.457347", "step": 6036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:43.505904", "step": 6036, "epoch": 3 }, { "type": "loss", "content": 0.0010449119145050645, "timestamp": "2025-09-10 02:28:43.516285", "step": 6037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:43.558636", "step": 6037, "epoch": 3 }, { "type": "loss", "content": 0.00028321417630650103, "timestamp": "2025-09-10 02:28:43.572055", "step": 6038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:43.605389", "step": 6038, "epoch": 3 }, { "type": "loss", "content": 0.004012010060250759, "timestamp": "2025-09-10 02:28:43.617264", "step": 6039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:43.652786", "step": 6039, "epoch": 3 }, { "type": "loss", "content": 0.0009143882198259234, "timestamp": "2025-09-10 02:28:43.684521", "step": 6040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:43.717671", "step": 6040, "epoch": 3 }, { "type": "loss", "content": 0.0016910507110878825, "timestamp": "2025-09-10 02:28:43.730404", "step": 6041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:43.765385", "step": 6041, "epoch": 3 }, { "type": "loss", "content": 0.007233879994601011, "timestamp": "2025-09-10 02:28:43.772972", "step": 6042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:43.805267", "step": 6042, "epoch": 3 }, { "type": "loss", "content": 0.0003337309753987938, "timestamp": "2025-09-10 02:28:43.812135", "step": 6043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:43.857425", "step": 6043, "epoch": 3 }, { "type": "loss", "content": 0.001774253905750811, "timestamp": "2025-09-10 02:28:43.890870", "step": 6044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:43.926016", "step": 6044, "epoch": 3 }, { "type": "loss", "content": 0.009199073538184166, "timestamp": "2025-09-10 02:28:43.938684", "step": 6045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:43.973721", "step": 6045, "epoch": 3 }, { "type": "loss", "content": 0.00048229689127765596, "timestamp": "2025-09-10 02:28:43.978131", "step": 6046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:44.010245", "step": 6046, "epoch": 3 }, { "type": "loss", "content": 0.0005172424134798348, "timestamp": "2025-09-10 02:28:44.022269", "step": 6047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:44.056102", "step": 6047, "epoch": 3 }, { "type": "loss", "content": 0.009047497995197773, "timestamp": "2025-09-10 02:28:44.088040", "step": 6048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:28:44.140827", "step": 6048, "epoch": 3 }, { "type": "loss", "content": 0.0007229651673696935, "timestamp": "2025-09-10 02:28:44.153855", "step": 6049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:44.205343", "step": 6049, "epoch": 3 }, { "type": "loss", "content": 0.0003836154646705836, "timestamp": "2025-09-10 02:28:44.218696", "step": 6050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:44.257047", "step": 6050, "epoch": 3 }, { "type": "loss", "content": 0.00197161384858191, "timestamp": "2025-09-10 02:28:44.264179", "step": 6051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:44.300441", "step": 6051, "epoch": 3 }, { "type": "loss", "content": 0.01630636677145958, "timestamp": "2025-09-10 02:28:44.328369", "step": 6052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:44.364877", "step": 6052, "epoch": 3 }, { "type": "loss", "content": 6.930591916898265e-05, "timestamp": "2025-09-10 02:28:44.369520", "step": 6053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:44.409417", "step": 6053, "epoch": 3 }, { "type": "loss", "content": 0.00038816872984170914, "timestamp": "2025-09-10 02:28:44.421684", "step": 6054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:44.456166", "step": 6054, "epoch": 3 }, { "type": "loss", "content": 0.0002050340553978458, "timestamp": "2025-09-10 02:28:44.466586", "step": 6055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:44.515033", "step": 6055, "epoch": 3 }, { "type": "loss", "content": 0.00029212163644842803, "timestamp": "2025-09-10 02:28:44.546306", "step": 6056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:44.591369", "step": 6056, "epoch": 3 }, { "type": "loss", "content": 0.00010645970905898139, "timestamp": "2025-09-10 02:28:44.597363", "step": 6057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:44.633439", "step": 6057, "epoch": 3 }, { "type": "loss", "content": 0.0017570939380675554, "timestamp": "2025-09-10 02:28:44.645838", "step": 6058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:44.682235", "step": 6058, "epoch": 3 }, { "type": "loss", "content": 0.00048702204367145896, "timestamp": "2025-09-10 02:28:44.686480", "step": 6059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:44.718445", "step": 6059, "epoch": 3 }, { "type": "loss", "content": 0.00012442604929674417, "timestamp": "2025-09-10 02:28:44.750222", "step": 6060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:44.785080", "step": 6060, "epoch": 3 }, { "type": "loss", "content": 0.0001539927179692313, "timestamp": "2025-09-10 02:28:44.789559", "step": 6061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:44.832176", "step": 6061, "epoch": 3 }, { "type": "loss", "content": 0.001589708379469812, "timestamp": "2025-09-10 02:28:44.845571", "step": 6062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:44.898514", "step": 6062, "epoch": 3 }, { "type": "loss", "content": 8.440674719167873e-05, "timestamp": "2025-09-10 02:28:44.908974", "step": 6063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:44.945327", "step": 6063, "epoch": 3 }, { "type": "loss", "content": 0.022429468110203743, "timestamp": "2025-09-10 02:28:44.973142", "step": 6064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:45.007027", "step": 6064, "epoch": 3 }, { "type": "loss", "content": 0.0002527502947486937, "timestamp": "2025-09-10 02:28:45.017259", "step": 6065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:45.056236", "step": 6065, "epoch": 3 }, { "type": "loss", "content": 0.001043917378410697, "timestamp": "2025-09-10 02:28:45.070101", "step": 6066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:45.102885", "step": 6066, "epoch": 3 }, { "type": "loss", "content": 0.00024156781728379428, "timestamp": "2025-09-10 02:28:45.115456", "step": 6067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:45.148757", "step": 6067, "epoch": 3 }, { "type": "loss", "content": 8.486958540743217e-05, "timestamp": "2025-09-10 02:28:45.179988", "step": 6068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:45.214966", "step": 6068, "epoch": 3 }, { "type": "loss", "content": 0.00023520128161180764, "timestamp": "2025-09-10 02:28:45.219084", "step": 6069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:45.255633", "step": 6069, "epoch": 3 }, { "type": "loss", "content": 0.0004728248168248683, "timestamp": "2025-09-10 02:28:45.262485", "step": 6070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:45.306098", "step": 6070, "epoch": 3 }, { "type": "loss", "content": 0.0002277484891237691, "timestamp": "2025-09-10 02:28:45.319908", "step": 6071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:45.354726", "step": 6071, "epoch": 3 }, { "type": "loss", "content": 5.23619819432497e-05, "timestamp": "2025-09-10 02:28:45.385846", "step": 6072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:45.424762", "step": 6072, "epoch": 3 }, { "type": "loss", "content": 0.00031708512688055634, "timestamp": "2025-09-10 02:28:45.435251", "step": 6073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:45.468626", "step": 6073, "epoch": 3 }, { "type": "loss", "content": 0.0004512048908509314, "timestamp": "2025-09-10 02:28:45.475803", "step": 6074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:45.510771", "step": 6074, "epoch": 3 }, { "type": "loss", "content": 0.00019133243768010288, "timestamp": "2025-09-10 02:28:45.520955", "step": 6075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:45.563511", "step": 6075, "epoch": 3 }, { "type": "loss", "content": 0.0029035231564193964, "timestamp": "2025-09-10 02:28:45.596965", "step": 6076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:28:45.630230", "step": 6076, "epoch": 3 }, { "type": "loss", "content": 0.0011966234305873513, "timestamp": "2025-09-10 02:28:45.643233", "step": 6077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:45.678334", "step": 6077, "epoch": 3 }, { "type": "loss", "content": 3.068596561206505e-05, "timestamp": "2025-09-10 02:28:45.688687", "step": 6078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:28:45.727400", "step": 6078, "epoch": 3 }, { "type": "loss", "content": 0.00018004176672548056, "timestamp": "2025-09-10 02:28:45.741205", "step": 6079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:45.792778", "step": 6079, "epoch": 3 }, { "type": "loss", "content": 0.00031091499840840697, "timestamp": "2025-09-10 02:28:45.821496", "step": 6080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:45.855903", "step": 6080, "epoch": 3 }, { "type": "loss", "content": 0.006474101450294256, "timestamp": "2025-09-10 02:28:45.863049", "step": 6081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:45.900461", "step": 6081, "epoch": 3 }, { "type": "loss", "content": 0.0032853742595762014, "timestamp": "2025-09-10 02:28:45.910994", "step": 6082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:45.947133", "step": 6082, "epoch": 3 }, { "type": "loss", "content": 5.7726305385585874e-05, "timestamp": "2025-09-10 02:28:45.954240", "step": 6083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:45.992236", "step": 6083, "epoch": 3 }, { "type": "loss", "content": 0.00022245707805268466, "timestamp": "2025-09-10 02:28:46.026454", "step": 6084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:46.060512", "step": 6084, "epoch": 3 }, { "type": "loss", "content": 0.000945181876886636, "timestamp": "2025-09-10 02:28:46.068454", "step": 6085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:28:46.103091", "step": 6085, "epoch": 3 }, { "type": "loss", "content": 0.004430218134075403, "timestamp": "2025-09-10 02:28:46.116492", "step": 6086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:46.151682", "step": 6086, "epoch": 3 }, { "type": "loss", "content": 4.371793329482898e-05, "timestamp": "2025-09-10 02:28:46.156048", "step": 6087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:46.191776", "step": 6087, "epoch": 3 }, { "type": "loss", "content": 0.00032512666075490415, "timestamp": "2025-09-10 02:28:46.220288", "step": 6088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:46.253403", "step": 6088, "epoch": 3 }, { "type": "loss", "content": 0.00032571834162808955, "timestamp": "2025-09-10 02:28:46.258905", "step": 6089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:46.294231", "step": 6089, "epoch": 3 }, { "type": "loss", "content": 0.016125816851854324, "timestamp": "2025-09-10 02:28:46.305045", "step": 6090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:46.336814", "step": 6090, "epoch": 3 }, { "type": "loss", "content": 2.697331365197897e-05, "timestamp": "2025-09-10 02:28:46.339693", "step": 6091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:46.378084", "step": 6091, "epoch": 3 }, { "type": "loss", "content": 0.0005751307471655309, "timestamp": "2025-09-10 02:28:46.409404", "step": 6092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:46.450036", "step": 6092, "epoch": 3 }, { "type": "loss", "content": 4.076838376931846e-05, "timestamp": "2025-09-10 02:28:46.456193", "step": 6093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 624 ], "flops": 18509808050496 }, "timestamp": "2025-09-10 02:28:46.508832", "step": 6093, "epoch": 3 }, { "type": "loss", "content": 0.0009186511742882431, "timestamp": "2025-09-10 02:28:46.530588", "step": 6094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:46.562802", "step": 6094, "epoch": 3 }, { "type": "loss", "content": 0.006315763108432293, "timestamp": "2025-09-10 02:28:46.569897", "step": 6095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:46.600861", "step": 6095, "epoch": 3 }, { "type": "loss", "content": 0.00013419199967756867, "timestamp": "2025-09-10 02:28:46.625650", "step": 6096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:28:46.665746", "step": 6096, "epoch": 3 }, { "type": "loss", "content": 0.0002742501674219966, "timestamp": "2025-09-10 02:28:46.682715", "step": 6097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:46.720741", "step": 6097, "epoch": 3 }, { "type": "loss", "content": 0.0033741388469934464, "timestamp": "2025-09-10 02:28:46.733329", "step": 6098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:46.764850", "step": 6098, "epoch": 3 }, { "type": "loss", "content": 3.133829522994347e-05, "timestamp": "2025-09-10 02:28:46.771679", "step": 6099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:46.811405", "step": 6099, "epoch": 3 }, { "type": "loss", "content": 4.3357093090889975e-05, "timestamp": "2025-09-10 02:28:46.835866", "step": 6100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:46.875977", "step": 6100, "epoch": 3 }, { "type": "loss", "content": 0.00026234795222990215, "timestamp": "2025-09-10 02:28:46.884317", "step": 6101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:46.923251", "step": 6101, "epoch": 3 }, { "type": "loss", "content": 0.0012434854870662093, "timestamp": "2025-09-10 02:28:46.927769", "step": 6102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:46.960500", "step": 6102, "epoch": 3 }, { "type": "loss", "content": 0.021313535049557686, "timestamp": "2025-09-10 02:28:46.967469", "step": 6103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:47.000997", "step": 6103, "epoch": 3 }, { "type": "loss", "content": 0.003981561399996281, "timestamp": "2025-09-10 02:28:47.028737", "step": 6104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:47.065770", "step": 6104, "epoch": 3 }, { "type": "loss", "content": 0.00014674547128379345, "timestamp": "2025-09-10 02:28:47.074269", "step": 6105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:47.109901", "step": 6105, "epoch": 3 }, { "type": "loss", "content": 0.0017406666884198785, "timestamp": "2025-09-10 02:28:47.112275", "step": 6106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:47.142972", "step": 6106, "epoch": 3 }, { "type": "loss", "content": 0.0001538008509669453, "timestamp": "2025-09-10 02:28:47.150025", "step": 6107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:47.184350", "step": 6107, "epoch": 3 }, { "type": "loss", "content": 0.0009951989632099867, "timestamp": "2025-09-10 02:28:47.217782", "step": 6108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:47.257856", "step": 6108, "epoch": 3 }, { "type": "loss", "content": 0.0021144095808267593, "timestamp": "2025-09-10 02:28:47.273336", "step": 6109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:47.307354", "step": 6109, "epoch": 3 }, { "type": "loss", "content": 0.0008319019107148051, "timestamp": "2025-09-10 02:28:47.317527", "step": 6110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:47.353515", "step": 6110, "epoch": 3 }, { "type": "loss", "content": 0.00018621633353177458, "timestamp": "2025-09-10 02:28:47.358189", "step": 6111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:47.398595", "step": 6111, "epoch": 3 }, { "type": "loss", "content": 0.03902193531394005, "timestamp": "2025-09-10 02:28:47.426998", "step": 6112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:47.458338", "step": 6112, "epoch": 3 }, { "type": "loss", "content": 5.721451816498302e-05, "timestamp": "2025-09-10 02:28:47.460447", "step": 6113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:28:47.491411", "step": 6113, "epoch": 3 }, { "type": "loss", "content": 0.000635522126685828, "timestamp": "2025-09-10 02:28:47.497979", "step": 6114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:47.532982", "step": 6114, "epoch": 3 }, { "type": "loss", "content": 0.0002452080079820007, "timestamp": "2025-09-10 02:28:47.540095", "step": 6115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:47.574851", "step": 6115, "epoch": 3 }, { "type": "loss", "content": 0.0011523573193699121, "timestamp": "2025-09-10 02:28:47.603452", "step": 6116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:47.650342", "step": 6116, "epoch": 3 }, { "type": "loss", "content": 0.00015113291738089174, "timestamp": "2025-09-10 02:28:47.658203", "step": 6117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:47.701167", "step": 6117, "epoch": 3 }, { "type": "loss", "content": 0.0004939243663102388, "timestamp": "2025-09-10 02:28:47.709590", "step": 6118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:28:47.775156", "step": 6118, "epoch": 3 }, { "type": "loss", "content": 0.02475116029381752, "timestamp": "2025-09-10 02:28:47.796645", "step": 6119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:47.827951", "step": 6119, "epoch": 3 }, { "type": "loss", "content": 3.918137008440681e-05, "timestamp": "2025-09-10 02:28:47.853222", "step": 6120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:47.885164", "step": 6120, "epoch": 3 }, { "type": "loss", "content": 3.542963168001734e-05, "timestamp": "2025-09-10 02:28:47.887509", "step": 6121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:47.917812", "step": 6121, "epoch": 3 }, { "type": "loss", "content": 0.046116914600133896, "timestamp": "2025-09-10 02:28:47.922199", "step": 6122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:47.954445", "step": 6122, "epoch": 3 }, { "type": "loss", "content": 0.0024645677767693996, "timestamp": "2025-09-10 02:28:47.962071", "step": 6123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:47.992822", "step": 6123, "epoch": 3 }, { "type": "loss", "content": 0.0003803297586273402, "timestamp": "2025-09-10 02:28:48.018057", "step": 6124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:48.056437", "step": 6124, "epoch": 3 }, { "type": "loss", "content": 0.00047259125858545303, "timestamp": "2025-09-10 02:28:48.061808", "step": 6125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:48.093197", "step": 6125, "epoch": 3 }, { "type": "loss", "content": 0.00037615117616951466, "timestamp": "2025-09-10 02:28:48.097671", "step": 6126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:48.129051", "step": 6126, "epoch": 3 }, { "type": "loss", "content": 0.0004323399916756898, "timestamp": "2025-09-10 02:28:48.135875", "step": 6127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.172723", "step": 6127, "epoch": 3 }, { "type": "loss", "content": 0.0437922365963459, "timestamp": "2025-09-10 02:28:48.201023", "step": 6128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:48.235490", "step": 6128, "epoch": 3 }, { "type": "loss", "content": 0.000819290173240006, "timestamp": "2025-09-10 02:28:48.240886", "step": 6129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:48.280162", "step": 6129, "epoch": 3 }, { "type": "loss", "content": 0.00019732918008230627, "timestamp": "2025-09-10 02:28:48.292529", "step": 6130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.323914", "step": 6130, "epoch": 3 }, { "type": "loss", "content": 0.0008840580121614039, "timestamp": "2025-09-10 02:28:48.331410", "step": 6131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:48.361984", "step": 6131, "epoch": 3 }, { "type": "loss", "content": 0.019107328727841377, "timestamp": "2025-09-10 02:28:48.389830", "step": 6132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:48.421969", "step": 6132, "epoch": 3 }, { "type": "loss", "content": 8.087460446404293e-05, "timestamp": "2025-09-10 02:28:48.430257", "step": 6133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.462010", "step": 6133, "epoch": 3 }, { "type": "loss", "content": 0.008161481469869614, "timestamp": "2025-09-10 02:28:48.469397", "step": 6134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:48.500501", "step": 6134, "epoch": 3 }, { "type": "loss", "content": 0.04065088555216789, "timestamp": "2025-09-10 02:28:48.508237", "step": 6135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.539182", "step": 6135, "epoch": 3 }, { "type": "loss", "content": 0.001969260396435857, "timestamp": "2025-09-10 02:28:48.567465", "step": 6136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:48.601560", "step": 6136, "epoch": 3 }, { "type": "loss", "content": 0.0010869913967326283, "timestamp": "2025-09-10 02:28:48.606358", "step": 6137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:48.637725", "step": 6137, "epoch": 3 }, { "type": "loss", "content": 0.0005913428612984717, "timestamp": "2025-09-10 02:28:48.642348", "step": 6138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.675095", "step": 6138, "epoch": 3 }, { "type": "loss", "content": 6.689595466013998e-05, "timestamp": "2025-09-10 02:28:48.682623", "step": 6139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:48.714074", "step": 6139, "epoch": 3 }, { "type": "loss", "content": 0.005156568717211485, "timestamp": "2025-09-10 02:28:48.742463", "step": 6140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:28:48.774908", "step": 6140, "epoch": 3 }, { "type": "loss", "content": 0.00014441793609876186, "timestamp": "2025-09-10 02:28:48.784687", "step": 6141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:48.818569", "step": 6141, "epoch": 3 }, { "type": "loss", "content": 0.0004379312158562243, "timestamp": "2025-09-10 02:28:48.826274", "step": 6142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:48.859648", "step": 6142, "epoch": 3 }, { "type": "loss", "content": 0.012073171325027943, "timestamp": "2025-09-10 02:28:48.866500", "step": 6143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:48.904904", "step": 6143, "epoch": 3 }, { "type": "loss", "content": 0.0002663929190021008, "timestamp": "2025-09-10 02:28:48.932951", "step": 6144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:28:48.986342", "step": 6144, "epoch": 3 }, { "type": "loss", "content": 0.0007393588311970234, "timestamp": "2025-09-10 02:28:49.010022", "step": 6145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:49.041968", "step": 6145, "epoch": 3 }, { "type": "loss", "content": 0.00043535669101402164, "timestamp": "2025-09-10 02:28:49.048697", "step": 6146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:49.080271", "step": 6146, "epoch": 3 }, { "type": "loss", "content": 0.002159666968509555, "timestamp": "2025-09-10 02:28:49.087247", "step": 6147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:28:49.117573", "step": 6147, "epoch": 3 }, { "type": "loss", "content": 0.001196373486891389, "timestamp": "2025-09-10 02:28:49.142819", "step": 6148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:49.173591", "step": 6148, "epoch": 3 }, { "type": "loss", "content": 0.02430625446140766, "timestamp": "2025-09-10 02:28:49.182109", "step": 6149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:49.213236", "step": 6149, "epoch": 3 }, { "type": "loss", "content": 0.001197171164676547, "timestamp": "2025-09-10 02:28:49.223428", "step": 6150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:49.254356", "step": 6150, "epoch": 3 }, { "type": "loss", "content": 0.00013094481255393475, "timestamp": "2025-09-10 02:28:49.258537", "step": 6151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:28:49.288607", "step": 6151, "epoch": 3 }, { "type": "loss", "content": 0.0014958838000893593, "timestamp": "2025-09-10 02:28:49.312699", "step": 6152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:49.344110", "step": 6152, "epoch": 3 }, { "type": "loss", "content": 0.00031426880741491914, "timestamp": "2025-09-10 02:28:49.348821", "step": 6153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:49.379500", "step": 6153, "epoch": 3 }, { "type": "loss", "content": 0.0012687371345236897, "timestamp": "2025-09-10 02:28:49.389987", "step": 6154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:49.421111", "step": 6154, "epoch": 3 }, { "type": "loss", "content": 0.00014976828242652118, "timestamp": "2025-09-10 02:28:49.432014", "step": 6155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:49.464403", "step": 6155, "epoch": 3 }, { "type": "loss", "content": 0.0032478817738592625, "timestamp": "2025-09-10 02:28:49.492166", "step": 6156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:49.523071", "step": 6156, "epoch": 3 }, { "type": "loss", "content": 0.0014056127984076738, "timestamp": "2025-09-10 02:28:49.528168", "step": 6157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:28:49.559237", "step": 6157, "epoch": 3 }, { "type": "loss", "content": 0.0010582717368379235, "timestamp": "2025-09-10 02:28:49.567068", "step": 6158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:49.599764", "step": 6158, "epoch": 3 }, { "type": "loss", "content": 0.00013609221787191927, "timestamp": "2025-09-10 02:28:49.606908", "step": 6159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:49.639359", "step": 6159, "epoch": 3 }, { "type": "loss", "content": 0.0004430489207152277, "timestamp": "2025-09-10 02:28:49.667476", "step": 6160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:49.699751", "step": 6160, "epoch": 3 }, { "type": "loss", "content": 0.001818513497710228, "timestamp": "2025-09-10 02:28:49.707627", "step": 6161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:28:49.747955", "step": 6161, "epoch": 3 }, { "type": "loss", "content": 0.00037555742892436683, "timestamp": "2025-09-10 02:28:49.763907", "step": 6162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:49.796138", "step": 6162, "epoch": 3 }, { "type": "loss", "content": 0.0007206489099189639, "timestamp": "2025-09-10 02:28:49.808649", "step": 6163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:49.839676", "step": 6163, "epoch": 3 }, { "type": "loss", "content": 0.0002763103402685374, "timestamp": "2025-09-10 02:28:49.870777", "step": 6164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:28:49.902544", "step": 6164, "epoch": 3 }, { "type": "loss", "content": 0.0009474234539084136, "timestamp": "2025-09-10 02:28:49.907330", "step": 6165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:28:49.939488", "step": 6165, "epoch": 3 }, { "type": "loss", "content": 0.0009898262796923518, "timestamp": "2025-09-10 02:28:49.949704", "step": 6166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:49.980929", "step": 6166, "epoch": 3 }, { "type": "loss", "content": 0.000859494844917208, "timestamp": "2025-09-10 02:28:49.988378", "step": 6167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:28:50.020605", "step": 6167, "epoch": 3 }, { "type": "loss", "content": 0.0009193348814733326, "timestamp": "2025-09-10 02:28:50.053933", "step": 6168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:50.093804", "step": 6168, "epoch": 3 }, { "type": "loss", "content": 0.004276874475181103, "timestamp": "2025-09-10 02:28:50.102501", "step": 6169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:50.136920", "step": 6169, "epoch": 3 }, { "type": "loss", "content": 0.00454701716080308, "timestamp": "2025-09-10 02:28:50.144309", "step": 6170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:28:50.175129", "step": 6170, "epoch": 3 }, { "type": "loss", "content": 0.0004608361341524869, "timestamp": "2025-09-10 02:28:50.186036", "step": 6171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:28:50.220449", "step": 6171, "epoch": 3 }, { "type": "loss", "content": 0.000643234234303236, "timestamp": "2025-09-10 02:28:50.248738", "step": 6172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:28:50.281343", "step": 6172, "epoch": 3 }, { "type": "loss", "content": 0.001443680957891047, "timestamp": "2025-09-10 02:28:50.285965", "step": 6173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:28:50.318501", "step": 6173, "epoch": 3 }, { "type": "loss", "content": 0.007593140471726656, "timestamp": "2025-09-10 02:28:50.322577", "step": 6174, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:29:00.483350", "step": 6174, "epoch": 3 }, { "type": "pplx", "content": 23762090.47420289, "timestamp": "2025-09-10 02:29:00.489527", "step": 6174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:00.522250", "step": 6174, "epoch": 3 }, { "type": "loss", "content": 0.0009576130541972816, "timestamp": "2025-09-10 02:29:00.528475", "step": 6175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:00.565354", "step": 6175, "epoch": 3 }, { "type": "loss", "content": 0.006735025439411402, "timestamp": "2025-09-10 02:29:00.593441", "step": 6176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:00.627618", "step": 6176, "epoch": 3 }, { "type": "loss", "content": 0.005353983025997877, "timestamp": "2025-09-10 02:29:00.635299", "step": 6177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:00.669053", "step": 6177, "epoch": 3 }, { "type": "loss", "content": 0.004146324936300516, "timestamp": "2025-09-10 02:29:00.673169", "step": 6178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:00.709091", "step": 6178, "epoch": 3 }, { "type": "loss", "content": 0.0026420990470796824, "timestamp": "2025-09-10 02:29:00.714523", "step": 6179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:29:00.758352", "step": 6179, "epoch": 3 }, { "type": "loss", "content": 0.005170282907783985, "timestamp": "2025-09-10 02:29:00.796546", "step": 6180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:00.836181", "step": 6180, "epoch": 3 }, { "type": "loss", "content": 0.003013778477907181, "timestamp": "2025-09-10 02:29:00.851358", "step": 6181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:00.886755", "step": 6181, "epoch": 3 }, { "type": "loss", "content": 0.001864836667664349, "timestamp": "2025-09-10 02:29:00.897162", "step": 6182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:00.935035", "step": 6182, "epoch": 3 }, { "type": "loss", "content": 0.0007107080891728401, "timestamp": "2025-09-10 02:29:00.946950", "step": 6183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:29:00.991051", "step": 6183, "epoch": 3 }, { "type": "loss", "content": 0.0008580170688219368, "timestamp": "2025-09-10 02:29:01.028951", "step": 6184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:01.064026", "step": 6184, "epoch": 3 }, { "type": "loss", "content": 0.007555230520665646, "timestamp": "2025-09-10 02:29:01.068065", "step": 6185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:29:01.111953", "step": 6185, "epoch": 3 }, { "type": "loss", "content": 0.0021088081412017345, "timestamp": "2025-09-10 02:29:01.128296", "step": 6186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:01.162621", "step": 6186, "epoch": 3 }, { "type": "loss", "content": 0.0008245863718912005, "timestamp": "2025-09-10 02:29:01.169216", "step": 6187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:01.202455", "step": 6187, "epoch": 3 }, { "type": "loss", "content": 0.007418735884130001, "timestamp": "2025-09-10 02:29:01.233410", "step": 6188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:01.266922", "step": 6188, "epoch": 3 }, { "type": "loss", "content": 0.00027580276946537197, "timestamp": "2025-09-10 02:29:01.275668", "step": 6189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:01.309893", "step": 6189, "epoch": 3 }, { "type": "loss", "content": 0.0014374948805198073, "timestamp": "2025-09-10 02:29:01.320644", "step": 6190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:01.353527", "step": 6190, "epoch": 3 }, { "type": "loss", "content": 0.0003637855697888881, "timestamp": "2025-09-10 02:29:01.357634", "step": 6191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:01.390955", "step": 6191, "epoch": 3 }, { "type": "loss", "content": 0.003992599435150623, "timestamp": "2025-09-10 02:29:01.423392", "step": 6192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:01.457871", "step": 6192, "epoch": 3 }, { "type": "loss", "content": 4.350113886175677e-05, "timestamp": "2025-09-10 02:29:01.461906", "step": 6193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:01.496563", "step": 6193, "epoch": 3 }, { "type": "loss", "content": 0.0022764094173908234, "timestamp": "2025-09-10 02:29:01.507981", "step": 6194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:01.541893", "step": 6194, "epoch": 3 }, { "type": "loss", "content": 0.012956062331795692, "timestamp": "2025-09-10 02:29:01.551989", "step": 6195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:01.586391", "step": 6195, "epoch": 3 }, { "type": "loss", "content": 0.000641929917037487, "timestamp": "2025-09-10 02:29:01.617664", "step": 6196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:01.651653", "step": 6196, "epoch": 3 }, { "type": "loss", "content": 0.0020424830727279186, "timestamp": "2025-09-10 02:29:01.661198", "step": 6197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:01.693355", "step": 6197, "epoch": 3 }, { "type": "loss", "content": 0.0005121281137689948, "timestamp": "2025-09-10 02:29:01.699969", "step": 6198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:01.732326", "step": 6198, "epoch": 3 }, { "type": "loss", "content": 0.0009792317869141698, "timestamp": "2025-09-10 02:29:01.736720", "step": 6199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:01.770056", "step": 6199, "epoch": 3 }, { "type": "loss", "content": 0.0007432979182340205, "timestamp": "2025-09-10 02:29:01.795525", "step": 6200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:01.830760", "step": 6200, "epoch": 3 }, { "type": "loss", "content": 0.0006474620313383639, "timestamp": "2025-09-10 02:29:01.835262", "step": 6201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:01.868661", "step": 6201, "epoch": 3 }, { "type": "loss", "content": 0.0018694715108722448, "timestamp": "2025-09-10 02:29:01.873579", "step": 6202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:01.904865", "step": 6202, "epoch": 3 }, { "type": "loss", "content": 0.001604403369128704, "timestamp": "2025-09-10 02:29:01.909029", "step": 6203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:01.954060", "step": 6203, "epoch": 3 }, { "type": "loss", "content": 3.979109533247538e-05, "timestamp": "2025-09-10 02:29:01.988965", "step": 6204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:02.020146", "step": 6204, "epoch": 3 }, { "type": "loss", "content": 0.006631503812968731, "timestamp": "2025-09-10 02:29:02.023430", "step": 6205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:02.057417", "step": 6205, "epoch": 3 }, { "type": "loss", "content": 0.003117796266451478, "timestamp": "2025-09-10 02:29:02.064614", "step": 6206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:02.098793", "step": 6206, "epoch": 3 }, { "type": "loss", "content": 0.004380426835268736, "timestamp": "2025-09-10 02:29:02.108753", "step": 6207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:02.141675", "step": 6207, "epoch": 3 }, { "type": "loss", "content": 0.016728242859244347, "timestamp": "2025-09-10 02:29:02.170150", "step": 6208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:02.199959", "step": 6208, "epoch": 3 }, { "type": "loss", "content": 0.0002014095662161708, "timestamp": "2025-09-10 02:29:02.202448", "step": 6209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:02.238990", "step": 6209, "epoch": 3 }, { "type": "loss", "content": 0.0007832984556443989, "timestamp": "2025-09-10 02:29:02.248922", "step": 6210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:02.283798", "step": 6210, "epoch": 3 }, { "type": "loss", "content": 0.0007357418653555214, "timestamp": "2025-09-10 02:29:02.294566", "step": 6211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:02.329738", "step": 6211, "epoch": 3 }, { "type": "loss", "content": 0.02407163567841053, "timestamp": "2025-09-10 02:29:02.369183", "step": 6212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:02.402828", "step": 6212, "epoch": 3 }, { "type": "loss", "content": 8.155436808010563e-05, "timestamp": "2025-09-10 02:29:02.407820", "step": 6213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:02.448483", "step": 6213, "epoch": 3 }, { "type": "loss", "content": 0.0003762414853554219, "timestamp": "2025-09-10 02:29:02.453924", "step": 6214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:02.492975", "step": 6214, "epoch": 3 }, { "type": "loss", "content": 0.001163300359621644, "timestamp": "2025-09-10 02:29:02.500308", "step": 6215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:02.540491", "step": 6215, "epoch": 3 }, { "type": "loss", "content": 0.011203320696949959, "timestamp": "2025-09-10 02:29:02.571357", "step": 6216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:02.606934", "step": 6216, "epoch": 3 }, { "type": "loss", "content": 0.0006556047010235488, "timestamp": "2025-09-10 02:29:02.611843", "step": 6217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:29:02.656645", "step": 6217, "epoch": 3 }, { "type": "loss", "content": 0.0007842601626180112, "timestamp": "2025-09-10 02:29:02.674001", "step": 6218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:02.707892", "step": 6218, "epoch": 3 }, { "type": "loss", "content": 0.0004893920267932117, "timestamp": "2025-09-10 02:29:02.714573", "step": 6219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:02.750107", "step": 6219, "epoch": 3 }, { "type": "loss", "content": 0.0008870299207046628, "timestamp": "2025-09-10 02:29:02.778290", "step": 6220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:02.810971", "step": 6220, "epoch": 3 }, { "type": "loss", "content": 0.0022531996946781874, "timestamp": "2025-09-10 02:29:02.815289", "step": 6221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:29:02.858836", "step": 6221, "epoch": 3 }, { "type": "loss", "content": 0.0007312820525839925, "timestamp": "2025-09-10 02:29:02.874928", "step": 6222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:02.912150", "step": 6222, "epoch": 3 }, { "type": "loss", "content": 0.0002016266662394628, "timestamp": "2025-09-10 02:29:02.922542", "step": 6223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:02.958404", "step": 6223, "epoch": 3 }, { "type": "loss", "content": 0.0006459229625761509, "timestamp": "2025-09-10 02:29:02.986941", "step": 6224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:03.019933", "step": 6224, "epoch": 3 }, { "type": "loss", "content": 0.0004273521772120148, "timestamp": "2025-09-10 02:29:03.024404", "step": 6225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:03.059004", "step": 6225, "epoch": 3 }, { "type": "loss", "content": 0.00010626261064317077, "timestamp": "2025-09-10 02:29:03.065725", "step": 6226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:03.101442", "step": 6226, "epoch": 3 }, { "type": "loss", "content": 0.0011228015646338463, "timestamp": "2025-09-10 02:29:03.108686", "step": 6227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:03.147917", "step": 6227, "epoch": 3 }, { "type": "loss", "content": 0.0005814795149490237, "timestamp": "2025-09-10 02:29:03.176118", "step": 6228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:03.212972", "step": 6228, "epoch": 3 }, { "type": "loss", "content": 1.321633408224443e-05, "timestamp": "2025-09-10 02:29:03.218168", "step": 6229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:03.262762", "step": 6229, "epoch": 3 }, { "type": "loss", "content": 0.002059446182101965, "timestamp": "2025-09-10 02:29:03.274682", "step": 6230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:03.307936", "step": 6230, "epoch": 3 }, { "type": "loss", "content": 0.0014323127688840032, "timestamp": "2025-09-10 02:29:03.315020", "step": 6231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:03.347935", "step": 6231, "epoch": 3 }, { "type": "loss", "content": 0.0002452459593769163, "timestamp": "2025-09-10 02:29:03.378692", "step": 6232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:03.413946", "step": 6232, "epoch": 3 }, { "type": "loss", "content": 0.01287010032683611, "timestamp": "2025-09-10 02:29:03.423501", "step": 6233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:03.463502", "step": 6233, "epoch": 3 }, { "type": "loss", "content": 3.2527696021134034e-05, "timestamp": "2025-09-10 02:29:03.470318", "step": 6234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:03.508243", "step": 6234, "epoch": 3 }, { "type": "loss", "content": 0.00437973951920867, "timestamp": "2025-09-10 02:29:03.515668", "step": 6235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:03.552354", "step": 6235, "epoch": 3 }, { "type": "loss", "content": 0.010270438157022, "timestamp": "2025-09-10 02:29:03.578323", "step": 6236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:03.612892", "step": 6236, "epoch": 3 }, { "type": "loss", "content": 0.00011186213669134304, "timestamp": "2025-09-10 02:29:03.619445", "step": 6237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:03.654461", "step": 6237, "epoch": 3 }, { "type": "loss", "content": 0.00016674351354595274, "timestamp": "2025-09-10 02:29:03.661753", "step": 6238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:03.706916", "step": 6238, "epoch": 3 }, { "type": "loss", "content": 0.0005096830427646637, "timestamp": "2025-09-10 02:29:03.714335", "step": 6239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:03.750293", "step": 6239, "epoch": 3 }, { "type": "loss", "content": 8.230676030507311e-05, "timestamp": "2025-09-10 02:29:03.777459", "step": 6240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:03.816710", "step": 6240, "epoch": 3 }, { "type": "loss", "content": 0.00013677349488716573, "timestamp": "2025-09-10 02:29:03.832166", "step": 6241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:03.868850", "step": 6241, "epoch": 3 }, { "type": "loss", "content": 2.3667438654229045e-05, "timestamp": "2025-09-10 02:29:03.880696", "step": 6242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:03.915934", "step": 6242, "epoch": 3 }, { "type": "loss", "content": 0.00017127035243902355, "timestamp": "2025-09-10 02:29:03.922666", "step": 6243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:03.957512", "step": 6243, "epoch": 3 }, { "type": "loss", "content": 0.0002690852852538228, "timestamp": "2025-09-10 02:29:03.985803", "step": 6244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:04.022765", "step": 6244, "epoch": 3 }, { "type": "loss", "content": 0.00017910164024215192, "timestamp": "2025-09-10 02:29:04.030531", "step": 6245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:04.066882", "step": 6245, "epoch": 3 }, { "type": "loss", "content": 0.0005587777122855186, "timestamp": "2025-09-10 02:29:04.074089", "step": 6246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:29:04.118109", "step": 6246, "epoch": 3 }, { "type": "loss", "content": 0.0008401383529417217, "timestamp": "2025-09-10 02:29:04.135472", "step": 6247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:04.170553", "step": 6247, "epoch": 3 }, { "type": "loss", "content": 0.0020682509057223797, "timestamp": "2025-09-10 02:29:04.201548", "step": 6248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:04.236004", "step": 6248, "epoch": 3 }, { "type": "loss", "content": 0.0007200734107755125, "timestamp": "2025-09-10 02:29:04.239906", "step": 6249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:04.272787", "step": 6249, "epoch": 3 }, { "type": "loss", "content": 0.00019148035789839923, "timestamp": "2025-09-10 02:29:04.279772", "step": 6250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:04.315061", "step": 6250, "epoch": 3 }, { "type": "loss", "content": 4.394280040287413e-05, "timestamp": "2025-09-10 02:29:04.317814", "step": 6251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:04.351509", "step": 6251, "epoch": 3 }, { "type": "loss", "content": 0.0002166307531297207, "timestamp": "2025-09-10 02:29:04.377873", "step": 6252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:04.421383", "step": 6252, "epoch": 3 }, { "type": "loss", "content": 0.0003251233429182321, "timestamp": "2025-09-10 02:29:04.428899", "step": 6253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:04.467402", "step": 6253, "epoch": 3 }, { "type": "loss", "content": 0.00010890467092394829, "timestamp": "2025-09-10 02:29:04.474674", "step": 6254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:04.509098", "step": 6254, "epoch": 3 }, { "type": "loss", "content": 0.0005082925199531019, "timestamp": "2025-09-10 02:29:04.511914", "step": 6255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:04.545351", "step": 6255, "epoch": 3 }, { "type": "loss", "content": 0.0006019308930262923, "timestamp": "2025-09-10 02:29:04.576810", "step": 6256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:04.613745", "step": 6256, "epoch": 3 }, { "type": "loss", "content": 0.00034762476570904255, "timestamp": "2025-09-10 02:29:04.617911", "step": 6257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:04.650328", "step": 6257, "epoch": 3 }, { "type": "loss", "content": 0.00011139985144836828, "timestamp": "2025-09-10 02:29:04.660196", "step": 6258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:04.698657", "step": 6258, "epoch": 3 }, { "type": "loss", "content": 0.00037154555320739746, "timestamp": "2025-09-10 02:29:04.712334", "step": 6259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:04.746352", "step": 6259, "epoch": 3 }, { "type": "loss", "content": 0.00046982159256003797, "timestamp": "2025-09-10 02:29:04.773897", "step": 6260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:04.804817", "step": 6260, "epoch": 3 }, { "type": "loss", "content": 0.00027465628227218986, "timestamp": "2025-09-10 02:29:04.807534", "step": 6261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:04.838605", "step": 6261, "epoch": 3 }, { "type": "loss", "content": 0.0002272390847792849, "timestamp": "2025-09-10 02:29:04.848663", "step": 6262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:04.880669", "step": 6262, "epoch": 3 }, { "type": "loss", "content": 0.0003084797062911093, "timestamp": "2025-09-10 02:29:04.888316", "step": 6263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:04.924069", "step": 6263, "epoch": 3 }, { "type": "loss", "content": 0.0007478706538677216, "timestamp": "2025-09-10 02:29:04.949180", "step": 6264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:04.983852", "step": 6264, "epoch": 3 }, { "type": "loss", "content": 0.0004629208124242723, "timestamp": "2025-09-10 02:29:04.992116", "step": 6265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:05.025256", "step": 6265, "epoch": 3 }, { "type": "loss", "content": 0.0001887015241663903, "timestamp": "2025-09-10 02:29:05.029368", "step": 6266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:05.062885", "step": 6266, "epoch": 3 }, { "type": "loss", "content": 0.001650593476369977, "timestamp": "2025-09-10 02:29:05.069594", "step": 6267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:05.102767", "step": 6267, "epoch": 3 }, { "type": "loss", "content": 0.0003856061666738242, "timestamp": "2025-09-10 02:29:05.130373", "step": 6268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:05.164244", "step": 6268, "epoch": 3 }, { "type": "loss", "content": 0.0007830560207366943, "timestamp": "2025-09-10 02:29:05.166809", "step": 6269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:05.203631", "step": 6269, "epoch": 3 }, { "type": "loss", "content": 0.0001599523238837719, "timestamp": "2025-09-10 02:29:05.216216", "step": 6270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:05.249646", "step": 6270, "epoch": 3 }, { "type": "loss", "content": 0.0012749488232657313, "timestamp": "2025-09-10 02:29:05.257268", "step": 6271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:05.292066", "step": 6271, "epoch": 3 }, { "type": "loss", "content": 0.0036235000006854534, "timestamp": "2025-09-10 02:29:05.321219", "step": 6272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:05.365793", "step": 6272, "epoch": 3 }, { "type": "loss", "content": 0.0004903791705146432, "timestamp": "2025-09-10 02:29:05.369874", "step": 6273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:05.402955", "step": 6273, "epoch": 3 }, { "type": "loss", "content": 7.18557057552971e-05, "timestamp": "2025-09-10 02:29:05.409791", "step": 6274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:05.443844", "step": 6274, "epoch": 3 }, { "type": "loss", "content": 5.8820067351916805e-05, "timestamp": "2025-09-10 02:29:05.455625", "step": 6275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:05.491242", "step": 6275, "epoch": 3 }, { "type": "loss", "content": 0.0001387560332659632, "timestamp": "2025-09-10 02:29:05.524569", "step": 6276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:05.557513", "step": 6276, "epoch": 3 }, { "type": "loss", "content": 8.674280252307653e-05, "timestamp": "2025-09-10 02:29:05.561570", "step": 6277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:05.597546", "step": 6277, "epoch": 3 }, { "type": "loss", "content": 0.0016429023817181587, "timestamp": "2025-09-10 02:29:05.602308", "step": 6278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:05.635431", "step": 6278, "epoch": 3 }, { "type": "loss", "content": 0.0003711978788487613, "timestamp": "2025-09-10 02:29:05.642513", "step": 6279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:05.678251", "step": 6279, "epoch": 3 }, { "type": "loss", "content": 0.0036703094374388456, "timestamp": "2025-09-10 02:29:05.706203", "step": 6280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:05.739233", "step": 6280, "epoch": 3 }, { "type": "loss", "content": 0.0004256887186784297, "timestamp": "2025-09-10 02:29:05.747299", "step": 6281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:05.780857", "step": 6281, "epoch": 3 }, { "type": "loss", "content": 0.00036882911808788776, "timestamp": "2025-09-10 02:29:05.788219", "step": 6282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:05.826181", "step": 6282, "epoch": 3 }, { "type": "loss", "content": 0.0006809058249928057, "timestamp": "2025-09-10 02:29:05.839544", "step": 6283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:05.874700", "step": 6283, "epoch": 3 }, { "type": "loss", "content": 0.0020839818753302097, "timestamp": "2025-09-10 02:29:05.899681", "step": 6284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:05.934688", "step": 6284, "epoch": 3 }, { "type": "loss", "content": 0.00040493832784704864, "timestamp": "2025-09-10 02:29:05.940112", "step": 6285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:05.978214", "step": 6285, "epoch": 3 }, { "type": "loss", "content": 8.699164027348161e-05, "timestamp": "2025-09-10 02:29:05.991891", "step": 6286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:06.026523", "step": 6286, "epoch": 3 }, { "type": "loss", "content": 0.00018825959705282003, "timestamp": "2025-09-10 02:29:06.038487", "step": 6287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:06.072604", "step": 6287, "epoch": 3 }, { "type": "loss", "content": 0.0004126779385842383, "timestamp": "2025-09-10 02:29:06.097564", "step": 6288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:06.128927", "step": 6288, "epoch": 3 }, { "type": "loss", "content": 0.02210475504398346, "timestamp": "2025-09-10 02:29:06.131446", "step": 6289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:06.164947", "step": 6289, "epoch": 3 }, { "type": "loss", "content": 0.0003806811582762748, "timestamp": "2025-09-10 02:29:06.171748", "step": 6290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:06.203970", "step": 6290, "epoch": 3 }, { "type": "loss", "content": 0.002564129186794162, "timestamp": "2025-09-10 02:29:06.208366", "step": 6291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:06.240294", "step": 6291, "epoch": 3 }, { "type": "loss", "content": 0.0007361977477557957, "timestamp": "2025-09-10 02:29:06.268273", "step": 6292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:06.299459", "step": 6292, "epoch": 3 }, { "type": "loss", "content": 4.1402661736356094e-05, "timestamp": "2025-09-10 02:29:06.301852", "step": 6293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:06.333635", "step": 6293, "epoch": 3 }, { "type": "loss", "content": 5.298778341966681e-05, "timestamp": "2025-09-10 02:29:06.340760", "step": 6294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:06.373775", "step": 6294, "epoch": 3 }, { "type": "loss", "content": 0.04112233966588974, "timestamp": "2025-09-10 02:29:06.381249", "step": 6295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:06.413683", "step": 6295, "epoch": 3 }, { "type": "loss", "content": 0.0010367208160459995, "timestamp": "2025-09-10 02:29:06.441519", "step": 6296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:06.473067", "step": 6296, "epoch": 3 }, { "type": "loss", "content": 0.000295735226245597, "timestamp": "2025-09-10 02:29:06.482727", "step": 6297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:06.515480", "step": 6297, "epoch": 3 }, { "type": "loss", "content": 0.00019989509019069374, "timestamp": "2025-09-10 02:29:06.522560", "step": 6298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:06.553317", "step": 6298, "epoch": 3 }, { "type": "loss", "content": 8.843276737025008e-05, "timestamp": "2025-09-10 02:29:06.564281", "step": 6299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:06.596144", "step": 6299, "epoch": 3 }, { "type": "loss", "content": 0.00021590027608908713, "timestamp": "2025-09-10 02:29:06.629593", "step": 6300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:06.661426", "step": 6300, "epoch": 3 }, { "type": "loss", "content": 5.1329996495041996e-05, "timestamp": "2025-09-10 02:29:06.665540", "step": 6301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:06.698119", "step": 6301, "epoch": 3 }, { "type": "loss", "content": 0.0038885578978806734, "timestamp": "2025-09-10 02:29:06.707892", "step": 6302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:06.740805", "step": 6302, "epoch": 3 }, { "type": "loss", "content": 0.00019760747090913355, "timestamp": "2025-09-10 02:29:06.747974", "step": 6303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:06.780375", "step": 6303, "epoch": 3 }, { "type": "loss", "content": 1.984067785087973e-05, "timestamp": "2025-09-10 02:29:06.811653", "step": 6304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:06.843758", "step": 6304, "epoch": 3 }, { "type": "loss", "content": 0.0008169592474587262, "timestamp": "2025-09-10 02:29:06.851168", "step": 6305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:06.882525", "step": 6305, "epoch": 3 }, { "type": "loss", "content": 0.0003005763574037701, "timestamp": "2025-09-10 02:29:06.890220", "step": 6306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:06.921053", "step": 6306, "epoch": 3 }, { "type": "loss", "content": 9.076990681933239e-05, "timestamp": "2025-09-10 02:29:06.923924", "step": 6307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:06.955910", "step": 6307, "epoch": 3 }, { "type": "loss", "content": 5.251143375062384e-05, "timestamp": "2025-09-10 02:29:06.988551", "step": 6308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:07.020723", "step": 6308, "epoch": 3 }, { "type": "loss", "content": 8.822443487588316e-05, "timestamp": "2025-09-10 02:29:07.025564", "step": 6309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:07.056508", "step": 6309, "epoch": 3 }, { "type": "loss", "content": 0.00012206012615934014, "timestamp": "2025-09-10 02:29:07.059078", "step": 6310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:07.092205", "step": 6310, "epoch": 3 }, { "type": "loss", "content": 6.703573308186606e-05, "timestamp": "2025-09-10 02:29:07.102240", "step": 6311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:07.134448", "step": 6311, "epoch": 3 }, { "type": "loss", "content": 7.877570897107944e-05, "timestamp": "2025-09-10 02:29:07.165292", "step": 6312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:07.197350", "step": 6312, "epoch": 3 }, { "type": "loss", "content": 0.00010713493247749284, "timestamp": "2025-09-10 02:29:07.202384", "step": 6313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:07.235524", "step": 6313, "epoch": 3 }, { "type": "loss", "content": 3.199342609150335e-05, "timestamp": "2025-09-10 02:29:07.248852", "step": 6314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:07.280036", "step": 6314, "epoch": 3 }, { "type": "loss", "content": 0.02364686317741871, "timestamp": "2025-09-10 02:29:07.283883", "step": 6315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:07.315780", "step": 6315, "epoch": 3 }, { "type": "loss", "content": 0.0002355035103391856, "timestamp": "2025-09-10 02:29:07.348371", "step": 6316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:07.383001", "step": 6316, "epoch": 3 }, { "type": "loss", "content": 0.00014525903679896146, "timestamp": "2025-09-10 02:29:07.388034", "step": 6317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:07.419491", "step": 6317, "epoch": 3 }, { "type": "loss", "content": 0.00019105462706647813, "timestamp": "2025-09-10 02:29:07.431764", "step": 6318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:07.463177", "step": 6318, "epoch": 3 }, { "type": "loss", "content": 0.00010136031778529286, "timestamp": "2025-09-10 02:29:07.469800", "step": 6319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:07.500713", "step": 6319, "epoch": 3 }, { "type": "loss", "content": 0.00030389634775929153, "timestamp": "2025-09-10 02:29:07.528449", "step": 6320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:07.559425", "step": 6320, "epoch": 3 }, { "type": "loss", "content": 0.0005676125292666256, "timestamp": "2025-09-10 02:29:07.564296", "step": 6321, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:29:18.130853", "step": 6321, "epoch": 3 }, { "type": "pplx", "content": 22779605.39083871, "timestamp": "2025-09-10 02:29:18.134652", "step": 6321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:18.167347", "step": 6321, "epoch": 3 }, { "type": "loss", "content": 0.000332222378347069, "timestamp": "2025-09-10 02:29:18.173098", "step": 6322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:18.205647", "step": 6322, "epoch": 3 }, { "type": "loss", "content": 0.003474770812317729, "timestamp": "2025-09-10 02:29:18.209511", "step": 6323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:18.240090", "step": 6323, "epoch": 3 }, { "type": "loss", "content": 0.0007376385619863868, "timestamp": "2025-09-10 02:29:18.267870", "step": 6324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:18.306032", "step": 6324, "epoch": 3 }, { "type": "loss", "content": 0.021130096167325974, "timestamp": "2025-09-10 02:29:18.310959", "step": 6325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:18.343599", "step": 6325, "epoch": 3 }, { "type": "loss", "content": 0.0008156650001183152, "timestamp": "2025-09-10 02:29:18.351079", "step": 6326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:18.391999", "step": 6326, "epoch": 3 }, { "type": "loss", "content": 4.109603105462156e-05, "timestamp": "2025-09-10 02:29:18.405697", "step": 6327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:18.445889", "step": 6327, "epoch": 3 }, { "type": "loss", "content": 0.0003445267793722451, "timestamp": "2025-09-10 02:29:18.482728", "step": 6328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:18.517430", "step": 6328, "epoch": 3 }, { "type": "loss", "content": 0.00021596229635179043, "timestamp": "2025-09-10 02:29:18.522150", "step": 6329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:18.554164", "step": 6329, "epoch": 3 }, { "type": "loss", "content": 0.0005771416472271085, "timestamp": "2025-09-10 02:29:18.564176", "step": 6330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:18.602787", "step": 6330, "epoch": 3 }, { "type": "loss", "content": 0.0002720944758038968, "timestamp": "2025-09-10 02:29:18.609250", "step": 6331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:18.640525", "step": 6331, "epoch": 3 }, { "type": "loss", "content": 0.0016264248406514525, "timestamp": "2025-09-10 02:29:18.669868", "step": 6332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:18.701530", "step": 6332, "epoch": 3 }, { "type": "loss", "content": 0.00010872381972149014, "timestamp": "2025-09-10 02:29:18.711391", "step": 6333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:18.743000", "step": 6333, "epoch": 3 }, { "type": "loss", "content": 0.00038386922096833587, "timestamp": "2025-09-10 02:29:18.749953", "step": 6334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:18.780151", "step": 6334, "epoch": 3 }, { "type": "loss", "content": 0.0001077132037607953, "timestamp": "2025-09-10 02:29:18.787758", "step": 6335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:18.819772", "step": 6335, "epoch": 3 }, { "type": "loss", "content": 0.0004933988093398511, "timestamp": "2025-09-10 02:29:18.853286", "step": 6336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:18.885241", "step": 6336, "epoch": 3 }, { "type": "loss", "content": 0.0017707353690639138, "timestamp": "2025-09-10 02:29:18.890646", "step": 6337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:18.921242", "step": 6337, "epoch": 3 }, { "type": "loss", "content": 0.00026189981144852936, "timestamp": "2025-09-10 02:29:18.929151", "step": 6338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:18.960376", "step": 6338, "epoch": 3 }, { "type": "loss", "content": 0.07036115974187851, "timestamp": "2025-09-10 02:29:18.967898", "step": 6339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:19.000684", "step": 6339, "epoch": 3 }, { "type": "loss", "content": 0.00025524134980514646, "timestamp": "2025-09-10 02:29:19.033580", "step": 6340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:19.063853", "step": 6340, "epoch": 3 }, { "type": "loss", "content": 0.00033761485246941447, "timestamp": "2025-09-10 02:29:19.068626", "step": 6341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:19.099684", "step": 6341, "epoch": 3 }, { "type": "loss", "content": 0.0010502010118216276, "timestamp": "2025-09-10 02:29:19.104150", "step": 6342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:19.136087", "step": 6342, "epoch": 3 }, { "type": "loss", "content": 0.0003272096801083535, "timestamp": "2025-09-10 02:29:19.143049", "step": 6343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:19.174394", "step": 6343, "epoch": 3 }, { "type": "loss", "content": 1.662676368141547e-05, "timestamp": "2025-09-10 02:29:19.205407", "step": 6344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:19.235942", "step": 6344, "epoch": 3 }, { "type": "loss", "content": 0.00015760198584757745, "timestamp": "2025-09-10 02:29:19.238088", "step": 6345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:19.269413", "step": 6345, "epoch": 3 }, { "type": "loss", "content": 0.0003509795351419598, "timestamp": "2025-09-10 02:29:19.279455", "step": 6346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:19.309940", "step": 6346, "epoch": 3 }, { "type": "loss", "content": 0.002398541197180748, "timestamp": "2025-09-10 02:29:19.316540", "step": 6347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:19.347416", "step": 6347, "epoch": 3 }, { "type": "loss", "content": 6.516561552416533e-05, "timestamp": "2025-09-10 02:29:19.378249", "step": 6348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:19.408493", "step": 6348, "epoch": 3 }, { "type": "loss", "content": 7.48638849472627e-05, "timestamp": "2025-09-10 02:29:19.410610", "step": 6349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:19.441360", "step": 6349, "epoch": 3 }, { "type": "loss", "content": 3.105393989244476e-05, "timestamp": "2025-09-10 02:29:19.445757", "step": 6350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:19.477340", "step": 6350, "epoch": 3 }, { "type": "loss", "content": 0.00025512793217785656, "timestamp": "2025-09-10 02:29:19.484997", "step": 6351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:19.515787", "step": 6351, "epoch": 3 }, { "type": "loss", "content": 0.0005597950075753033, "timestamp": "2025-09-10 02:29:19.540639", "step": 6352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:19.571736", "step": 6352, "epoch": 3 }, { "type": "loss", "content": 0.0011579144047573209, "timestamp": "2025-09-10 02:29:19.575426", "step": 6353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:19.607848", "step": 6353, "epoch": 3 }, { "type": "loss", "content": 8.454350609099492e-05, "timestamp": "2025-09-10 02:29:19.615373", "step": 6354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:19.647231", "step": 6354, "epoch": 3 }, { "type": "loss", "content": 5.9956550103379413e-05, "timestamp": "2025-09-10 02:29:19.656938", "step": 6355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:19.687539", "step": 6355, "epoch": 3 }, { "type": "loss", "content": 3.7725461879745126e-05, "timestamp": "2025-09-10 02:29:19.715332", "step": 6356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:19.748381", "step": 6356, "epoch": 3 }, { "type": "loss", "content": 0.00045506874448619783, "timestamp": "2025-09-10 02:29:19.757975", "step": 6357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:19.789077", "step": 6357, "epoch": 3 }, { "type": "loss", "content": 0.005231906659901142, "timestamp": "2025-09-10 02:29:19.795938", "step": 6358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:19.827714", "step": 6358, "epoch": 3 }, { "type": "loss", "content": 0.0005539475823752582, "timestamp": "2025-09-10 02:29:19.834445", "step": 6359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:19.865423", "step": 6359, "epoch": 3 }, { "type": "loss", "content": 0.00410681264474988, "timestamp": "2025-09-10 02:29:19.893284", "step": 6360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:19.923869", "step": 6360, "epoch": 3 }, { "type": "loss", "content": 0.0008158148848451674, "timestamp": "2025-09-10 02:29:19.929029", "step": 6361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:19.963810", "step": 6361, "epoch": 3 }, { "type": "loss", "content": 0.0011450514430180192, "timestamp": "2025-09-10 02:29:19.977531", "step": 6362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:20.009029", "step": 6362, "epoch": 3 }, { "type": "loss", "content": 0.0005734324222430587, "timestamp": "2025-09-10 02:29:20.012932", "step": 6363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:20.044053", "step": 6363, "epoch": 3 }, { "type": "loss", "content": 8.529059414286166e-05, "timestamp": "2025-09-10 02:29:20.072584", "step": 6364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:20.105593", "step": 6364, "epoch": 3 }, { "type": "loss", "content": 0.022070255130529404, "timestamp": "2025-09-10 02:29:20.110631", "step": 6365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:20.150185", "step": 6365, "epoch": 3 }, { "type": "loss", "content": 0.0003581468772608787, "timestamp": "2025-09-10 02:29:20.157700", "step": 6366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:20.202522", "step": 6366, "epoch": 3 }, { "type": "loss", "content": 0.015283132903277874, "timestamp": "2025-09-10 02:29:20.218121", "step": 6367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:20.249342", "step": 6367, "epoch": 3 }, { "type": "loss", "content": 0.00029461071244440973, "timestamp": "2025-09-10 02:29:20.277746", "step": 6368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:20.310632", "step": 6368, "epoch": 3 }, { "type": "loss", "content": 0.010076651349663734, "timestamp": "2025-09-10 02:29:20.315613", "step": 6369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:20.346636", "step": 6369, "epoch": 3 }, { "type": "loss", "content": 0.0035657952539622784, "timestamp": "2025-09-10 02:29:20.353572", "step": 6370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:20.391678", "step": 6370, "epoch": 3 }, { "type": "loss", "content": 0.010319511406123638, "timestamp": "2025-09-10 02:29:20.407379", "step": 6371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:29:20.448636", "step": 6371, "epoch": 3 }, { "type": "loss", "content": 0.00019968757987953722, "timestamp": "2025-09-10 02:29:20.486664", "step": 6372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:20.517082", "step": 6372, "epoch": 3 }, { "type": "loss", "content": 0.0001550498272990808, "timestamp": "2025-09-10 02:29:20.521556", "step": 6373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:20.552236", "step": 6373, "epoch": 3 }, { "type": "loss", "content": 0.0003731503675226122, "timestamp": "2025-09-10 02:29:20.559068", "step": 6374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:20.589976", "step": 6374, "epoch": 3 }, { "type": "loss", "content": 0.00011220359738217667, "timestamp": "2025-09-10 02:29:20.602529", "step": 6375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:20.636903", "step": 6375, "epoch": 3 }, { "type": "loss", "content": 0.0002799866779241711, "timestamp": "2025-09-10 02:29:20.670381", "step": 6376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:29:20.709953", "step": 6376, "epoch": 3 }, { "type": "loss", "content": 0.003151228418573737, "timestamp": "2025-09-10 02:29:20.727181", "step": 6377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:20.758329", "step": 6377, "epoch": 3 }, { "type": "loss", "content": 0.00033930037170648575, "timestamp": "2025-09-10 02:29:20.770815", "step": 6378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:20.801897", "step": 6378, "epoch": 3 }, { "type": "loss", "content": 6.999308243393898e-05, "timestamp": "2025-09-10 02:29:20.808629", "step": 6379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:20.839608", "step": 6379, "epoch": 3 }, { "type": "loss", "content": 0.00043307337909936905, "timestamp": "2025-09-10 02:29:20.864987", "step": 6380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:20.895725", "step": 6380, "epoch": 3 }, { "type": "loss", "content": 0.00034244899870827794, "timestamp": "2025-09-10 02:29:20.898013", "step": 6381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:20.933233", "step": 6381, "epoch": 3 }, { "type": "loss", "content": 0.0011802476365119219, "timestamp": "2025-09-10 02:29:20.945269", "step": 6382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:20.982320", "step": 6382, "epoch": 3 }, { "type": "loss", "content": 0.0002502583956811577, "timestamp": "2025-09-10 02:29:20.993226", "step": 6383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:21.030608", "step": 6383, "epoch": 3 }, { "type": "loss", "content": 0.0013230193872004747, "timestamp": "2025-09-10 02:29:21.058653", "step": 6384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:21.093631", "step": 6384, "epoch": 3 }, { "type": "loss", "content": 0.0048297131434082985, "timestamp": "2025-09-10 02:29:21.096468", "step": 6385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:21.129065", "step": 6385, "epoch": 3 }, { "type": "loss", "content": 1.7518630556878634e-05, "timestamp": "2025-09-10 02:29:21.134528", "step": 6386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:21.166246", "step": 6386, "epoch": 3 }, { "type": "loss", "content": 0.0001769508671713993, "timestamp": "2025-09-10 02:29:21.173212", "step": 6387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:21.205453", "step": 6387, "epoch": 3 }, { "type": "loss", "content": 7.868801185395569e-05, "timestamp": "2025-09-10 02:29:21.236430", "step": 6388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:21.272399", "step": 6388, "epoch": 3 }, { "type": "loss", "content": 0.0004954601754434407, "timestamp": "2025-09-10 02:29:21.281350", "step": 6389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:21.315513", "step": 6389, "epoch": 3 }, { "type": "loss", "content": 0.0008721998310647905, "timestamp": "2025-09-10 02:29:21.327525", "step": 6390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:21.361599", "step": 6390, "epoch": 3 }, { "type": "loss", "content": 9.93388457573019e-05, "timestamp": "2025-09-10 02:29:21.368612", "step": 6391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:21.400136", "step": 6391, "epoch": 3 }, { "type": "loss", "content": 6.254738400457427e-05, "timestamp": "2025-09-10 02:29:21.424203", "step": 6392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:21.458021", "step": 6392, "epoch": 3 }, { "type": "loss", "content": 0.0006808959878981113, "timestamp": "2025-09-10 02:29:21.466527", "step": 6393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:29:21.508602", "step": 6393, "epoch": 3 }, { "type": "loss", "content": 0.00033295477624051273, "timestamp": "2025-09-10 02:29:21.524809", "step": 6394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:21.557536", "step": 6394, "epoch": 3 }, { "type": "loss", "content": 0.0014583735028281808, "timestamp": "2025-09-10 02:29:21.565081", "step": 6395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:21.603694", "step": 6395, "epoch": 3 }, { "type": "loss", "content": 0.000546331750229001, "timestamp": "2025-09-10 02:29:21.638539", "step": 6396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:21.673034", "step": 6396, "epoch": 3 }, { "type": "loss", "content": 0.00026958558009937406, "timestamp": "2025-09-10 02:29:21.676535", "step": 6397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:29:21.712740", "step": 6397, "epoch": 3 }, { "type": "loss", "content": 0.00034523935755714774, "timestamp": "2025-09-10 02:29:21.726607", "step": 6398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:21.761790", "step": 6398, "epoch": 3 }, { "type": "loss", "content": 0.00016174910706467927, "timestamp": "2025-09-10 02:29:21.771121", "step": 6399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:21.805907", "step": 6399, "epoch": 3 }, { "type": "loss", "content": 0.0008864761330187321, "timestamp": "2025-09-10 02:29:21.833966", "step": 6400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:21.865231", "step": 6400, "epoch": 3 }, { "type": "loss", "content": 0.0001508051936980337, "timestamp": "2025-09-10 02:29:21.869796", "step": 6401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:21.906240", "step": 6401, "epoch": 3 }, { "type": "loss", "content": 0.0057227760553359985, "timestamp": "2025-09-10 02:29:21.919569", "step": 6402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:21.951281", "step": 6402, "epoch": 3 }, { "type": "loss", "content": 0.02214057371020317, "timestamp": "2025-09-10 02:29:21.963275", "step": 6403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:22.004341", "step": 6403, "epoch": 3 }, { "type": "loss", "content": 0.0005699954344891012, "timestamp": "2025-09-10 02:29:22.035455", "step": 6404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:22.066677", "step": 6404, "epoch": 3 }, { "type": "loss", "content": 0.000413557660067454, "timestamp": "2025-09-10 02:29:22.069007", "step": 6405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:22.099892", "step": 6405, "epoch": 3 }, { "type": "loss", "content": 0.00027196184964850545, "timestamp": "2025-09-10 02:29:22.107338", "step": 6406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:22.144101", "step": 6406, "epoch": 3 }, { "type": "loss", "content": 0.00011958154937019572, "timestamp": "2025-09-10 02:29:22.151018", "step": 6407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:22.182768", "step": 6407, "epoch": 3 }, { "type": "loss", "content": 0.0002388076245551929, "timestamp": "2025-09-10 02:29:22.206859", "step": 6408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:22.238367", "step": 6408, "epoch": 3 }, { "type": "loss", "content": 0.0002621088642627001, "timestamp": "2025-09-10 02:29:22.246894", "step": 6409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:22.278944", "step": 6409, "epoch": 3 }, { "type": "loss", "content": 0.0003606698883231729, "timestamp": "2025-09-10 02:29:22.289087", "step": 6410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:22.323509", "step": 6410, "epoch": 3 }, { "type": "loss", "content": 0.006846979726105928, "timestamp": "2025-09-10 02:29:22.335043", "step": 6411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:22.369704", "step": 6411, "epoch": 3 }, { "type": "loss", "content": 0.0006495703128166497, "timestamp": "2025-09-10 02:29:22.398294", "step": 6412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:22.431201", "step": 6412, "epoch": 3 }, { "type": "loss", "content": 0.001449008472263813, "timestamp": "2025-09-10 02:29:22.444220", "step": 6413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:22.482765", "step": 6413, "epoch": 3 }, { "type": "loss", "content": 0.04917486757040024, "timestamp": "2025-09-10 02:29:22.498676", "step": 6414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:22.529859", "step": 6414, "epoch": 3 }, { "type": "loss", "content": 0.00042798795038834214, "timestamp": "2025-09-10 02:29:22.537243", "step": 6415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:22.568292", "step": 6415, "epoch": 3 }, { "type": "loss", "content": 0.00018693515448831022, "timestamp": "2025-09-10 02:29:22.593097", "step": 6416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:22.625685", "step": 6416, "epoch": 3 }, { "type": "loss", "content": 0.00015946182247716933, "timestamp": "2025-09-10 02:29:22.638362", "step": 6417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:22.669691", "step": 6417, "epoch": 3 }, { "type": "loss", "content": 3.3737265766831115e-05, "timestamp": "2025-09-10 02:29:22.679917", "step": 6418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:22.712098", "step": 6418, "epoch": 3 }, { "type": "loss", "content": 8.317730680573732e-05, "timestamp": "2025-09-10 02:29:22.722150", "step": 6419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:22.753156", "step": 6419, "epoch": 3 }, { "type": "loss", "content": 0.010926821269094944, "timestamp": "2025-09-10 02:29:22.781553", "step": 6420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:22.813347", "step": 6420, "epoch": 3 }, { "type": "loss", "content": 0.00013701205898541957, "timestamp": "2025-09-10 02:29:22.817704", "step": 6421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:22.850116", "step": 6421, "epoch": 3 }, { "type": "loss", "content": 0.0004380210011731833, "timestamp": "2025-09-10 02:29:22.862568", "step": 6422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:22.893942", "step": 6422, "epoch": 3 }, { "type": "loss", "content": 0.00029581101262010634, "timestamp": "2025-09-10 02:29:22.901232", "step": 6423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:22.932650", "step": 6423, "epoch": 3 }, { "type": "loss", "content": 0.00028576585464179516, "timestamp": "2025-09-10 02:29:22.963633", "step": 6424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:22.994670", "step": 6424, "epoch": 3 }, { "type": "loss", "content": 0.0003905796620529145, "timestamp": "2025-09-10 02:29:22.999676", "step": 6425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:23.030989", "step": 6425, "epoch": 3 }, { "type": "loss", "content": 0.0002705628867261112, "timestamp": "2025-09-10 02:29:23.038009", "step": 6426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:23.072734", "step": 6426, "epoch": 3 }, { "type": "loss", "content": 0.02130054123699665, "timestamp": "2025-09-10 02:29:23.086423", "step": 6427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:23.118615", "step": 6427, "epoch": 3 }, { "type": "loss", "content": 0.0003769658214878291, "timestamp": "2025-09-10 02:29:23.146259", "step": 6428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:23.180368", "step": 6428, "epoch": 3 }, { "type": "loss", "content": 0.00011719001486198977, "timestamp": "2025-09-10 02:29:23.182825", "step": 6429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:23.214238", "step": 6429, "epoch": 3 }, { "type": "loss", "content": 0.0029267126228660345, "timestamp": "2025-09-10 02:29:23.221867", "step": 6430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:23.252492", "step": 6430, "epoch": 3 }, { "type": "loss", "content": 0.00013307588233146816, "timestamp": "2025-09-10 02:29:23.255084", "step": 6431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:29:23.300832", "step": 6431, "epoch": 3 }, { "type": "loss", "content": 0.0002167918864870444, "timestamp": "2025-09-10 02:29:23.340946", "step": 6432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:23.380369", "step": 6432, "epoch": 3 }, { "type": "loss", "content": 0.00011096680100308731, "timestamp": "2025-09-10 02:29:23.384766", "step": 6433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:23.424786", "step": 6433, "epoch": 3 }, { "type": "loss", "content": 6.910169031471014e-05, "timestamp": "2025-09-10 02:29:23.437350", "step": 6434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:23.470228", "step": 6434, "epoch": 3 }, { "type": "loss", "content": 0.004474697168916464, "timestamp": "2025-09-10 02:29:23.475217", "step": 6435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:23.510567", "step": 6435, "epoch": 3 }, { "type": "loss", "content": 0.0006678365753032267, "timestamp": "2025-09-10 02:29:23.542157", "step": 6436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:23.583432", "step": 6436, "epoch": 3 }, { "type": "loss", "content": 0.005918839015066624, "timestamp": "2025-09-10 02:29:23.586403", "step": 6437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:23.626467", "step": 6437, "epoch": 3 }, { "type": "loss", "content": 0.0001482577354181558, "timestamp": "2025-09-10 02:29:23.640472", "step": 6438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:23.677715", "step": 6438, "epoch": 3 }, { "type": "loss", "content": 0.00014806709077674896, "timestamp": "2025-09-10 02:29:23.687915", "step": 6439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:23.725397", "step": 6439, "epoch": 3 }, { "type": "loss", "content": 0.0005189132643863559, "timestamp": "2025-09-10 02:29:23.754067", "step": 6440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:23.787062", "step": 6440, "epoch": 3 }, { "type": "loss", "content": 0.00017340357590001076, "timestamp": "2025-09-10 02:29:23.800043", "step": 6441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:23.831000", "step": 6441, "epoch": 3 }, { "type": "loss", "content": 0.030487949028611183, "timestamp": "2025-09-10 02:29:23.838852", "step": 6442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:23.869448", "step": 6442, "epoch": 3 }, { "type": "loss", "content": 0.026207389310002327, "timestamp": "2025-09-10 02:29:23.876351", "step": 6443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:23.907430", "step": 6443, "epoch": 3 }, { "type": "loss", "content": 0.00012488577340263873, "timestamp": "2025-09-10 02:29:23.931293", "step": 6444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:23.961966", "step": 6444, "epoch": 3 }, { "type": "loss", "content": 0.00043719136738218367, "timestamp": "2025-09-10 02:29:23.964116", "step": 6445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:24.004712", "step": 6445, "epoch": 3 }, { "type": "loss", "content": 0.0001817305019358173, "timestamp": "2025-09-10 02:29:24.011635", "step": 6446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:24.052007", "step": 6446, "epoch": 3 }, { "type": "loss", "content": 0.00037571805296465755, "timestamp": "2025-09-10 02:29:24.067930", "step": 6447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:24.099761", "step": 6447, "epoch": 3 }, { "type": "loss", "content": 0.00024322188983205706, "timestamp": "2025-09-10 02:29:24.125144", "step": 6448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:24.157642", "step": 6448, "epoch": 3 }, { "type": "loss", "content": 0.0034805149771273136, "timestamp": "2025-09-10 02:29:24.163103", "step": 6449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:24.193931", "step": 6449, "epoch": 3 }, { "type": "loss", "content": 8.158596756402403e-05, "timestamp": "2025-09-10 02:29:24.201350", "step": 6450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:24.231856", "step": 6450, "epoch": 3 }, { "type": "loss", "content": 0.0003348083992023021, "timestamp": "2025-09-10 02:29:24.235749", "step": 6451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:24.266402", "step": 6451, "epoch": 3 }, { "type": "loss", "content": 0.001363294431939721, "timestamp": "2025-09-10 02:29:24.294172", "step": 6452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:24.325874", "step": 6452, "epoch": 3 }, { "type": "loss", "content": 0.0007565673440694809, "timestamp": "2025-09-10 02:29:24.338554", "step": 6453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:24.370268", "step": 6453, "epoch": 3 }, { "type": "loss", "content": 3.2014875614549965e-05, "timestamp": "2025-09-10 02:29:24.378144", "step": 6454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:24.408859", "step": 6454, "epoch": 3 }, { "type": "loss", "content": 8.36865438031964e-05, "timestamp": "2025-09-10 02:29:24.416537", "step": 6455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:24.447271", "step": 6455, "epoch": 3 }, { "type": "loss", "content": 0.00034000244340859354, "timestamp": "2025-09-10 02:29:24.472591", "step": 6456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 608 ], "flops": 18035204324480 }, "timestamp": "2025-09-10 02:29:24.522668", "step": 6456, "epoch": 3 }, { "type": "loss", "content": 0.0011615986004471779, "timestamp": "2025-09-10 02:29:24.544241", "step": 6457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:24.576145", "step": 6457, "epoch": 3 }, { "type": "loss", "content": 0.0007336985436268151, "timestamp": "2025-09-10 02:29:24.578766", "step": 6458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:24.610449", "step": 6458, "epoch": 3 }, { "type": "loss", "content": 2.4305074475705624e-05, "timestamp": "2025-09-10 02:29:24.622480", "step": 6459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:24.655440", "step": 6459, "epoch": 3 }, { "type": "loss", "content": 0.0005083387950435281, "timestamp": "2025-09-10 02:29:24.680290", "step": 6460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:24.711201", "step": 6460, "epoch": 3 }, { "type": "loss", "content": 0.0002868285810109228, "timestamp": "2025-09-10 02:29:24.713814", "step": 6461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:24.744724", "step": 6461, "epoch": 3 }, { "type": "loss", "content": 6.002017835271545e-05, "timestamp": "2025-09-10 02:29:24.748333", "step": 6462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:24.781362", "step": 6462, "epoch": 3 }, { "type": "loss", "content": 9.268764551961794e-05, "timestamp": "2025-09-10 02:29:24.783970", "step": 6463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:24.814060", "step": 6463, "epoch": 3 }, { "type": "loss", "content": 0.0001622526760911569, "timestamp": "2025-09-10 02:29:24.837871", "step": 6464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:29:24.877032", "step": 6464, "epoch": 3 }, { "type": "loss", "content": 0.0037606866098940372, "timestamp": "2025-09-10 02:29:24.892897", "step": 6465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:24.927792", "step": 6465, "epoch": 3 }, { "type": "loss", "content": 0.0015440176939591765, "timestamp": "2025-09-10 02:29:24.941588", "step": 6466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:24.973250", "step": 6466, "epoch": 3 }, { "type": "loss", "content": 0.00043152968282811344, "timestamp": "2025-09-10 02:29:24.983279", "step": 6467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:25.014538", "step": 6467, "epoch": 3 }, { "type": "loss", "content": 0.0006583757349289954, "timestamp": "2025-09-10 02:29:25.045133", "step": 6468, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:29:35.349732", "step": 6468, "epoch": 3 }, { "type": "pplx", "content": 22255779.46558232, "timestamp": "2025-09-10 02:29:35.352759", "step": 6468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:35.384222", "step": 6468, "epoch": 3 }, { "type": "loss", "content": 0.0009330078610219061, "timestamp": "2025-09-10 02:29:35.388293", "step": 6469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:35.419665", "step": 6469, "epoch": 3 }, { "type": "loss", "content": 0.010204659774899483, "timestamp": "2025-09-10 02:29:35.426735", "step": 6470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:35.459078", "step": 6470, "epoch": 3 }, { "type": "loss", "content": 0.00023531516490038484, "timestamp": "2025-09-10 02:29:35.465701", "step": 6471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:35.503012", "step": 6471, "epoch": 3 }, { "type": "loss", "content": 0.0004967449931427836, "timestamp": "2025-09-10 02:29:35.528638", "step": 6472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:35.568261", "step": 6472, "epoch": 3 }, { "type": "loss", "content": 0.0005900642718188465, "timestamp": "2025-09-10 02:29:35.572624", "step": 6473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:35.612783", "step": 6473, "epoch": 3 }, { "type": "loss", "content": 0.0001301489828620106, "timestamp": "2025-09-10 02:29:35.623386", "step": 6474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:35.658385", "step": 6474, "epoch": 3 }, { "type": "loss", "content": 0.0002881655527744442, "timestamp": "2025-09-10 02:29:35.663428", "step": 6475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:35.701857", "step": 6475, "epoch": 3 }, { "type": "loss", "content": 0.00041221058927476406, "timestamp": "2025-09-10 02:29:35.734473", "step": 6476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:35.780821", "step": 6476, "epoch": 3 }, { "type": "loss", "content": 0.0002252735139336437, "timestamp": "2025-09-10 02:29:35.785876", "step": 6477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:35.820594", "step": 6477, "epoch": 3 }, { "type": "loss", "content": 0.00011001846723956987, "timestamp": "2025-09-10 02:29:35.830222", "step": 6478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:35.862077", "step": 6478, "epoch": 3 }, { "type": "loss", "content": 0.0007892417488619685, "timestamp": "2025-09-10 02:29:35.865799", "step": 6479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:35.898683", "step": 6479, "epoch": 3 }, { "type": "loss", "content": 0.009268686175346375, "timestamp": "2025-09-10 02:29:35.926101", "step": 6480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:35.961547", "step": 6480, "epoch": 3 }, { "type": "loss", "content": 0.002452400978654623, "timestamp": "2025-09-10 02:29:35.969221", "step": 6481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:36.000580", "step": 6481, "epoch": 3 }, { "type": "loss", "content": 0.00030889807385392487, "timestamp": "2025-09-10 02:29:36.005020", "step": 6482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:36.045471", "step": 6482, "epoch": 3 }, { "type": "loss", "content": 0.00015111747779883444, "timestamp": "2025-09-10 02:29:36.052698", "step": 6483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:36.091401", "step": 6483, "epoch": 3 }, { "type": "loss", "content": 0.000214752959436737, "timestamp": "2025-09-10 02:29:36.118935", "step": 6484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:36.151773", "step": 6484, "epoch": 3 }, { "type": "loss", "content": 0.0003622962685767561, "timestamp": "2025-09-10 02:29:36.156467", "step": 6485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:36.189541", "step": 6485, "epoch": 3 }, { "type": "loss", "content": 0.00042898583342321217, "timestamp": "2025-09-10 02:29:36.196999", "step": 6486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:36.232058", "step": 6486, "epoch": 3 }, { "type": "loss", "content": 0.0003162114298902452, "timestamp": "2025-09-10 02:29:36.244003", "step": 6487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:36.277950", "step": 6487, "epoch": 3 }, { "type": "loss", "content": 0.0003182740474585444, "timestamp": "2025-09-10 02:29:36.305280", "step": 6488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:36.339842", "step": 6488, "epoch": 3 }, { "type": "loss", "content": 0.00012332449841778725, "timestamp": "2025-09-10 02:29:36.345057", "step": 6489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:36.384063", "step": 6489, "epoch": 3 }, { "type": "loss", "content": 0.00016331372899003327, "timestamp": "2025-09-10 02:29:36.390704", "step": 6490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:36.427948", "step": 6490, "epoch": 3 }, { "type": "loss", "content": 0.0025508024264127016, "timestamp": "2025-09-10 02:29:36.441375", "step": 6491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:36.479851", "step": 6491, "epoch": 3 }, { "type": "loss", "content": 0.0007555651245638728, "timestamp": "2025-09-10 02:29:36.511641", "step": 6492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:36.546508", "step": 6492, "epoch": 3 }, { "type": "loss", "content": 0.0002513106446713209, "timestamp": "2025-09-10 02:29:36.550495", "step": 6493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:36.590373", "step": 6493, "epoch": 3 }, { "type": "loss", "content": 0.00017626323096919805, "timestamp": "2025-09-10 02:29:36.601330", "step": 6494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:36.643268", "step": 6494, "epoch": 3 }, { "type": "loss", "content": 0.0001794335839804262, "timestamp": "2025-09-10 02:29:36.657290", "step": 6495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:36.689244", "step": 6495, "epoch": 3 }, { "type": "loss", "content": 7.488606206607074e-05, "timestamp": "2025-09-10 02:29:36.720858", "step": 6496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:36.751619", "step": 6496, "epoch": 3 }, { "type": "loss", "content": 0.0005462738336063921, "timestamp": "2025-09-10 02:29:36.753944", "step": 6497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:36.792379", "step": 6497, "epoch": 3 }, { "type": "loss", "content": 0.00012696681369561702, "timestamp": "2025-09-10 02:29:36.807955", "step": 6498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:36.841494", "step": 6498, "epoch": 3 }, { "type": "loss", "content": 0.0005157067789696157, "timestamp": "2025-09-10 02:29:36.852298", "step": 6499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:36.883819", "step": 6499, "epoch": 3 }, { "type": "loss", "content": 0.0005326425889506936, "timestamp": "2025-09-10 02:29:36.907923", "step": 6500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 6500", "timestamp": "2025-09-10 02:29:41.632309", "step": 6500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:41.664361", "step": 6500, "epoch": 3 }, { "type": "loss", "content": 0.00022441011969931424, "timestamp": "2025-09-10 02:29:41.668364", "step": 6501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:41.699351", "step": 6501, "epoch": 3 }, { "type": "loss", "content": 0.0002523001458030194, "timestamp": "2025-09-10 02:29:41.701189", "step": 6502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:41.732711", "step": 6502, "epoch": 3 }, { "type": "loss", "content": 3.853170346701518e-05, "timestamp": "2025-09-10 02:29:41.739497", "step": 6503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:41.769971", "step": 6503, "epoch": 3 }, { "type": "loss", "content": 0.0001199183170683682, "timestamp": "2025-09-10 02:29:41.794729", "step": 6504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:41.831803", "step": 6504, "epoch": 3 }, { "type": "loss", "content": 0.0005333385779522359, "timestamp": "2025-09-10 02:29:41.846885", "step": 6505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:41.877696", "step": 6505, "epoch": 3 }, { "type": "loss", "content": 0.0003356721135787666, "timestamp": "2025-09-10 02:29:41.887991", "step": 6506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:41.930725", "step": 6506, "epoch": 3 }, { "type": "loss", "content": 0.0004762968164868653, "timestamp": "2025-09-10 02:29:41.944737", "step": 6507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:41.977144", "step": 6507, "epoch": 3 }, { "type": "loss", "content": 0.0037521845661103725, "timestamp": "2025-09-10 02:29:42.005092", "step": 6508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:42.039853", "step": 6508, "epoch": 3 }, { "type": "loss", "content": 0.0003358405956532806, "timestamp": "2025-09-10 02:29:42.043031", "step": 6509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:42.078894", "step": 6509, "epoch": 3 }, { "type": "loss", "content": 0.00010202966223005205, "timestamp": "2025-09-10 02:29:42.081544", "step": 6510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:42.112909", "step": 6510, "epoch": 3 }, { "type": "loss", "content": 5.8475307014305145e-05, "timestamp": "2025-09-10 02:29:42.117315", "step": 6511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:42.148295", "step": 6511, "epoch": 3 }, { "type": "loss", "content": 0.0015812184428796172, "timestamp": "2025-09-10 02:29:42.176703", "step": 6512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:42.209147", "step": 6512, "epoch": 3 }, { "type": "loss", "content": 0.000630614347755909, "timestamp": "2025-09-10 02:29:42.213683", "step": 6513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:42.246530", "step": 6513, "epoch": 3 }, { "type": "loss", "content": 0.002638591453433037, "timestamp": "2025-09-10 02:29:42.258549", "step": 6514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:42.292876", "step": 6514, "epoch": 3 }, { "type": "loss", "content": 0.0018498777644708753, "timestamp": "2025-09-10 02:29:42.306207", "step": 6515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:42.345000", "step": 6515, "epoch": 3 }, { "type": "loss", "content": 0.0014997952384874225, "timestamp": "2025-09-10 02:29:42.379266", "step": 6516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:42.423451", "step": 6516, "epoch": 3 }, { "type": "loss", "content": 5.7881530665326864e-05, "timestamp": "2025-09-10 02:29:42.426209", "step": 6517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:42.460619", "step": 6517, "epoch": 3 }, { "type": "loss", "content": 0.0023194043897092342, "timestamp": "2025-09-10 02:29:42.464969", "step": 6518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:42.495158", "step": 6518, "epoch": 3 }, { "type": "loss", "content": 0.0002513010986149311, "timestamp": "2025-09-10 02:29:42.499820", "step": 6519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:42.530931", "step": 6519, "epoch": 3 }, { "type": "loss", "content": 9.767297888174653e-05, "timestamp": "2025-09-10 02:29:42.559204", "step": 6520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:42.592773", "step": 6520, "epoch": 3 }, { "type": "loss", "content": 0.0012455906253308058, "timestamp": "2025-09-10 02:29:42.605309", "step": 6521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:42.638972", "step": 6521, "epoch": 3 }, { "type": "loss", "content": 0.00012123944179620594, "timestamp": "2025-09-10 02:29:42.644016", "step": 6522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:42.700829", "step": 6522, "epoch": 3 }, { "type": "loss", "content": 0.00010264220327371731, "timestamp": "2025-09-10 02:29:42.711735", "step": 6523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:42.743875", "step": 6523, "epoch": 3 }, { "type": "loss", "content": 0.00013027484237682074, "timestamp": "2025-09-10 02:29:42.774849", "step": 6524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:42.829251", "step": 6524, "epoch": 3 }, { "type": "loss", "content": 6.051839955034666e-05, "timestamp": "2025-09-10 02:29:42.830918", "step": 6525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:42.863231", "step": 6525, "epoch": 3 }, { "type": "loss", "content": 0.00017694670532364398, "timestamp": "2025-09-10 02:29:42.875174", "step": 6526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:42.911744", "step": 6526, "epoch": 3 }, { "type": "loss", "content": 0.0001635013904888183, "timestamp": "2025-09-10 02:29:42.918386", "step": 6527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:42.951115", "step": 6527, "epoch": 3 }, { "type": "loss", "content": 0.000155866306158714, "timestamp": "2025-09-10 02:29:42.981872", "step": 6528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:43.018101", "step": 6528, "epoch": 3 }, { "type": "loss", "content": 0.00020132049394305795, "timestamp": "2025-09-10 02:29:43.027472", "step": 6529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:43.059876", "step": 6529, "epoch": 3 }, { "type": "loss", "content": 0.0001669298653723672, "timestamp": "2025-09-10 02:29:43.064357", "step": 6530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:43.106956", "step": 6530, "epoch": 3 }, { "type": "loss", "content": 9.987391968024895e-05, "timestamp": "2025-09-10 02:29:43.120572", "step": 6531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:43.161837", "step": 6531, "epoch": 3 }, { "type": "loss", "content": 6.64821855025366e-05, "timestamp": "2025-09-10 02:29:43.192643", "step": 6532, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:43.233736", "step": 6532, "epoch": 3 }, { "type": "loss", "content": 0.00035039763315580785, "timestamp": "2025-09-10 02:29:43.238128", "step": 6533, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:43.275737", "step": 6533, "epoch": 3 }, { "type": "loss", "content": 0.00019631556642707437, "timestamp": "2025-09-10 02:29:43.286331", "step": 6534, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:43.321424", "step": 6534, "epoch": 3 }, { "type": "loss", "content": 0.004502573050558567, "timestamp": "2025-09-10 02:29:43.328363", "step": 6535, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:43.367093", "step": 6535, "epoch": 3 }, { "type": "loss", "content": 8.936825179262087e-05, "timestamp": "2025-09-10 02:29:43.401939", "step": 6536, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:43.444500", "step": 6536, "epoch": 3 }, { "type": "loss", "content": 0.0012994735734537244, "timestamp": "2025-09-10 02:29:43.450004", "step": 6537, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:43.488581", "step": 6537, "epoch": 3 }, { "type": "loss", "content": 0.00024626238155178726, "timestamp": "2025-09-10 02:29:43.500954", "step": 6538, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:43.537888", "step": 6538, "epoch": 3 }, { "type": "loss", "content": 0.0011256147408857942, "timestamp": "2025-09-10 02:29:43.544647", "step": 6539, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:43.582021", "step": 6539, "epoch": 3 }, { "type": "loss", "content": 0.0003471332311164588, "timestamp": "2025-09-10 02:29:43.610260", "step": 6540, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:43.648185", "step": 6540, "epoch": 3 }, { "type": "loss", "content": 0.0005624201148748398, "timestamp": "2025-09-10 02:29:43.652530", "step": 6541, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:43.685826", "step": 6541, "epoch": 3 }, { "type": "loss", "content": 0.000520360074006021, "timestamp": "2025-09-10 02:29:43.688755", "step": 6542, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:43.728439", "step": 6542, "epoch": 3 }, { "type": "loss", "content": 0.0008386308327317238, "timestamp": "2025-09-10 02:29:43.735366", "step": 6543, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:43.775439", "step": 6543, "epoch": 3 }, { "type": "loss", "content": 0.0001823129568947479, "timestamp": "2025-09-10 02:29:43.806352", "step": 6544, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:43.839287", "step": 6544, "epoch": 3 }, { "type": "loss", "content": 3.192834265064448e-05, "timestamp": "2025-09-10 02:29:43.843723", "step": 6545, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:43.875102", "step": 6545, "epoch": 3 }, { "type": "loss", "content": 0.0004066001274622977, "timestamp": "2025-09-10 02:29:43.879468", "step": 6546, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:43.911348", "step": 6546, "epoch": 3 }, { "type": "loss", "content": 0.005676894914358854, "timestamp": "2025-09-10 02:29:43.915314", "step": 6547, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:43.949276", "step": 6547, "epoch": 3 }, { "type": "loss", "content": 0.0017382523510605097, "timestamp": "2025-09-10 02:29:43.977822", "step": 6548, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:44.008971", "step": 6548, "epoch": 3 }, { "type": "loss", "content": 0.00020217923156451434, "timestamp": "2025-09-10 02:29:44.014497", "step": 6549, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:44.047133", "step": 6549, "epoch": 3 }, { "type": "loss", "content": 0.0010007356759160757, "timestamp": "2025-09-10 02:29:44.057103", "step": 6550, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:44.093005", "step": 6550, "epoch": 3 }, { "type": "loss", "content": 0.0002772933221422136, "timestamp": "2025-09-10 02:29:44.100364", "step": 6551, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:44.132702", "step": 6551, "epoch": 3 }, { "type": "loss", "content": 8.65510301082395e-05, "timestamp": "2025-09-10 02:29:44.165536", "step": 6552, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:44.199627", "step": 6552, "epoch": 3 }, { "type": "loss", "content": 0.00012201262870803475, "timestamp": "2025-09-10 02:29:44.207311", "step": 6553, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:44.241883", "step": 6553, "epoch": 3 }, { "type": "loss", "content": 2.7427620807429776e-05, "timestamp": "2025-09-10 02:29:44.253642", "step": 6554, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:44.289381", "step": 6554, "epoch": 3 }, { "type": "loss", "content": 0.0004760746378451586, "timestamp": "2025-09-10 02:29:44.293433", "step": 6555, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:44.326167", "step": 6555, "epoch": 3 }, { "type": "loss", "content": 5.913670247537084e-05, "timestamp": "2025-09-10 02:29:44.354407", "step": 6556, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:44.384952", "step": 6556, "epoch": 3 }, { "type": "loss", "content": 0.0003855906252283603, "timestamp": "2025-09-10 02:29:44.390366", "step": 6557, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:44.421919", "step": 6557, "epoch": 3 }, { "type": "loss", "content": 0.0006527866353280842, "timestamp": "2025-09-10 02:29:44.429692", "step": 6558, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:44.460396", "step": 6558, "epoch": 3 }, { "type": "loss", "content": 0.0004126446438021958, "timestamp": "2025-09-10 02:29:44.467105", "step": 6559, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:44.499030", "step": 6559, "epoch": 3 }, { "type": "loss", "content": 0.00010248164471704513, "timestamp": "2025-09-10 02:29:44.527617", "step": 6560, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:44.582600", "step": 6560, "epoch": 3 }, { "type": "loss", "content": 2.3595810489496216e-05, "timestamp": "2025-09-10 02:29:44.590845", "step": 6561, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:44.647250", "step": 6561, "epoch": 3 }, { "type": "loss", "content": 3.688699143822305e-05, "timestamp": "2025-09-10 02:29:44.660980", "step": 6562, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:44.699999", "step": 6562, "epoch": 3 }, { "type": "loss", "content": 0.00012046539632137865, "timestamp": "2025-09-10 02:29:44.706900", "step": 6563, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:44.742152", "step": 6563, "epoch": 3 }, { "type": "loss", "content": 9.20097081689164e-05, "timestamp": "2025-09-10 02:29:44.778567", "step": 6564, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:44.867801", "step": 6564, "epoch": 3 }, { "type": "loss", "content": 7.921749056549743e-05, "timestamp": "2025-09-10 02:29:44.872821", "step": 6565, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:44.939179", "step": 6565, "epoch": 3 }, { "type": "loss", "content": 0.0032475763000547886, "timestamp": "2025-09-10 02:29:44.955024", "step": 6566, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 848 ], "flops": 25154260214720 }, "timestamp": "2025-09-10 02:29:45.056353", "step": 6566, "epoch": 3 }, { "type": "loss", "content": 8.683669875608757e-05, "timestamp": "2025-09-10 02:29:45.085999", "step": 6567, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:45.121927", "step": 6567, "epoch": 3 }, { "type": "loss", "content": 7.070793799357489e-05, "timestamp": "2025-09-10 02:29:45.155401", "step": 6568, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:45.186619", "step": 6568, "epoch": 3 }, { "type": "loss", "content": 0.0010982885723933578, "timestamp": "2025-09-10 02:29:45.189689", "step": 6569, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:45.223202", "step": 6569, "epoch": 3 }, { "type": "loss", "content": 7.731281948508695e-05, "timestamp": "2025-09-10 02:29:45.227551", "step": 6570, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:45.259253", "step": 6570, "epoch": 3 }, { "type": "loss", "content": 0.00015924021136015654, "timestamp": "2025-09-10 02:29:45.269427", "step": 6571, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:45.306151", "step": 6571, "epoch": 3 }, { "type": "loss", "content": 0.00023259581939782947, "timestamp": "2025-09-10 02:29:45.333773", "step": 6572, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:45.367653", "step": 6572, "epoch": 3 }, { "type": "loss", "content": 0.0009659113129600883, "timestamp": "2025-09-10 02:29:45.380672", "step": 6573, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:45.416456", "step": 6573, "epoch": 3 }, { "type": "loss", "content": 0.0002564275055192411, "timestamp": "2025-09-10 02:29:45.430411", "step": 6574, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:45.462075", "step": 6574, "epoch": 3 }, { "type": "loss", "content": 8.740082557778805e-05, "timestamp": "2025-09-10 02:29:45.466099", "step": 6575, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:29:45.509626", "step": 6575, "epoch": 3 }, { "type": "loss", "content": 0.00016587102436460555, "timestamp": "2025-09-10 02:29:45.548209", "step": 6576, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:29:45.591561", "step": 6576, "epoch": 3 }, { "type": "loss", "content": 0.0008845412521623075, "timestamp": "2025-09-10 02:29:45.607247", "step": 6577, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:45.645434", "step": 6577, "epoch": 3 }, { "type": "loss", "content": 0.017135100439190865, "timestamp": "2025-09-10 02:29:45.649484", "step": 6578, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:45.685410", "step": 6578, "epoch": 3 }, { "type": "loss", "content": 0.005552711896598339, "timestamp": "2025-09-10 02:29:45.697410", "step": 6579, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:45.730471", "step": 6579, "epoch": 3 }, { "type": "loss", "content": 5.742481516790576e-05, "timestamp": "2025-09-10 02:29:45.766625", "step": 6580, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:45.802511", "step": 6580, "epoch": 3 }, { "type": "loss", "content": 0.00011518421524669975, "timestamp": "2025-09-10 02:29:45.810931", "step": 6581, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:45.844646", "step": 6581, "epoch": 3 }, { "type": "loss", "content": 0.02741926722228527, "timestamp": "2025-09-10 02:29:45.855494", "step": 6582, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:45.888579", "step": 6582, "epoch": 3 }, { "type": "loss", "content": 0.0002638068108353764, "timestamp": "2025-09-10 02:29:45.895389", "step": 6583, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:45.934941", "step": 6583, "epoch": 3 }, { "type": "loss", "content": 7.82187344157137e-05, "timestamp": "2025-09-10 02:29:45.969171", "step": 6584, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:46.009699", "step": 6584, "epoch": 3 }, { "type": "loss", "content": 0.00043393290252424777, "timestamp": "2025-09-10 02:29:46.014232", "step": 6585, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:46.054569", "step": 6585, "epoch": 3 }, { "type": "loss", "content": 0.0005896832444705069, "timestamp": "2025-09-10 02:29:46.063670", "step": 6586, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:46.097949", "step": 6586, "epoch": 3 }, { "type": "loss", "content": 2.4004228180274367e-05, "timestamp": "2025-09-10 02:29:46.104779", "step": 6587, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:46.143132", "step": 6587, "epoch": 3 }, { "type": "loss", "content": 4.16260190831963e-05, "timestamp": "2025-09-10 02:29:46.167051", "step": 6588, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:46.202964", "step": 6588, "epoch": 3 }, { "type": "loss", "content": 0.08385879546403885, "timestamp": "2025-09-10 02:29:46.218140", "step": 6589, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:46.253903", "step": 6589, "epoch": 3 }, { "type": "loss", "content": 4.120770245208405e-05, "timestamp": "2025-09-10 02:29:46.258184", "step": 6590, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:46.292173", "step": 6590, "epoch": 3 }, { "type": "loss", "content": 0.0004548307042568922, "timestamp": "2025-09-10 02:29:46.304343", "step": 6591, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:46.337290", "step": 6591, "epoch": 3 }, { "type": "loss", "content": 0.0008054524078033864, "timestamp": "2025-09-10 02:29:46.367624", "step": 6592, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:46.403516", "step": 6592, "epoch": 3 }, { "type": "loss", "content": 0.000291215896140784, "timestamp": "2025-09-10 02:29:46.407847", "step": 6593, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:29:46.446194", "step": 6593, "epoch": 3 }, { "type": "loss", "content": 2.139638854714576e-05, "timestamp": "2025-09-10 02:29:46.461869", "step": 6594, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:46.494685", "step": 6594, "epoch": 3 }, { "type": "loss", "content": 2.8166157790110447e-05, "timestamp": "2025-09-10 02:29:46.502312", "step": 6595, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:46.533636", "step": 6595, "epoch": 3 }, { "type": "loss", "content": 0.0057443794794380665, "timestamp": "2025-09-10 02:29:46.561742", "step": 6596, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:46.595237", "step": 6596, "epoch": 3 }, { "type": "loss", "content": 9.872858208836988e-05, "timestamp": "2025-09-10 02:29:46.608219", "step": 6597, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:46.639652", "step": 6597, "epoch": 3 }, { "type": "loss", "content": 0.00032129278406500816, "timestamp": "2025-09-10 02:29:46.646828", "step": 6598, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:46.678399", "step": 6598, "epoch": 3 }, { "type": "loss", "content": 5.182164386496879e-05, "timestamp": "2025-09-10 02:29:46.685105", "step": 6599, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:46.720441", "step": 6599, "epoch": 3 }, { "type": "loss", "content": 0.0004248176119290292, "timestamp": "2025-09-10 02:29:46.753685", "step": 6600, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:46.793074", "step": 6600, "epoch": 3 }, { "type": "loss", "content": 5.2954368584323674e-05, "timestamp": "2025-09-10 02:29:46.797228", "step": 6601, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:46.828902", "step": 6601, "epoch": 3 }, { "type": "loss", "content": 6.363394641084597e-05, "timestamp": "2025-09-10 02:29:46.835986", "step": 6602, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:46.871029", "step": 6602, "epoch": 3 }, { "type": "loss", "content": 0.0003344352007843554, "timestamp": "2025-09-10 02:29:46.878236", "step": 6603, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:46.912555", "step": 6603, "epoch": 3 }, { "type": "loss", "content": 0.0003345024597365409, "timestamp": "2025-09-10 02:29:46.940194", "step": 6604, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:46.975062", "step": 6604, "epoch": 3 }, { "type": "loss", "content": 0.00013073600712232292, "timestamp": "2025-09-10 02:29:46.980181", "step": 6605, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:47.011831", "step": 6605, "epoch": 3 }, { "type": "loss", "content": 0.00012158307799836621, "timestamp": "2025-09-10 02:29:47.018238", "step": 6606, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:47.050475", "step": 6606, "epoch": 3 }, { "type": "loss", "content": 0.00014575115346815437, "timestamp": "2025-09-10 02:29:47.059974", "step": 6607, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:29:47.115534", "step": 6607, "epoch": 3 }, { "type": "loss", "content": 0.0006869042408652604, "timestamp": "2025-09-10 02:29:47.159829", "step": 6608, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:47.192433", "step": 6608, "epoch": 3 }, { "type": "loss", "content": 0.0005232971161603928, "timestamp": "2025-09-10 02:29:47.200103", "step": 6609, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:47.231788", "step": 6609, "epoch": 3 }, { "type": "loss", "content": 0.0002385809930274263, "timestamp": "2025-09-10 02:29:47.238211", "step": 6610, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:29:47.278395", "step": 6610, "epoch": 3 }, { "type": "loss", "content": 0.00031182175735011697, "timestamp": "2025-09-10 02:29:47.294275", "step": 6611, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:47.325939", "step": 6611, "epoch": 3 }, { "type": "loss", "content": 9.482206223765388e-05, "timestamp": "2025-09-10 02:29:47.353366", "step": 6612, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:47.387214", "step": 6612, "epoch": 3 }, { "type": "loss", "content": 0.0003503487096168101, "timestamp": "2025-09-10 02:29:47.392229", "step": 6613, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:47.423378", "step": 6613, "epoch": 3 }, { "type": "loss", "content": 0.00023075289209373295, "timestamp": "2025-09-10 02:29:47.435195", "step": 6614, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:29:47.470337", "step": 6614, "epoch": 3 }, { "type": "loss", "content": 7.644097786396742e-05, "timestamp": "2025-09-10 02:29:47.484129", "step": 6615, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:29:57.697352", "step": 6615, "epoch": 3 }, { "type": "pplx", "content": 23155743.853774708, "timestamp": "2025-09-10 02:29:57.700483", "step": 6615, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:57.731870", "step": 6615, "epoch": 3 }, { "type": "loss", "content": 0.00013089847925584763, "timestamp": "2025-09-10 02:29:57.763239", "step": 6616, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:57.795019", "step": 6616, "epoch": 3 }, { "type": "loss", "content": 0.008317952044308186, "timestamp": "2025-09-10 02:29:57.802667", "step": 6617, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:57.836916", "step": 6617, "epoch": 3 }, { "type": "loss", "content": 0.00014241410826798528, "timestamp": "2025-09-10 02:29:57.850624", "step": 6618, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:57.882574", "step": 6618, "epoch": 3 }, { "type": "loss", "content": 0.0004595229693222791, "timestamp": "2025-09-10 02:29:57.890144", "step": 6619, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:57.920736", "step": 6619, "epoch": 3 }, { "type": "loss", "content": 0.0002367516717640683, "timestamp": "2025-09-10 02:29:57.945896", "step": 6620, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:57.976938", "step": 6620, "epoch": 3 }, { "type": "loss", "content": 8.231966057792306e-05, "timestamp": "2025-09-10 02:29:57.987433", "step": 6621, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:58.018056", "step": 6621, "epoch": 3 }, { "type": "loss", "content": 0.00013666613085661083, "timestamp": "2025-09-10 02:29:58.028280", "step": 6622, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:58.058965", "step": 6622, "epoch": 3 }, { "type": "loss", "content": 0.00022755752434022725, "timestamp": "2025-09-10 02:29:58.061395", "step": 6623, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:58.092174", "step": 6623, "epoch": 3 }, { "type": "loss", "content": 4.438480391399935e-05, "timestamp": "2025-09-10 02:29:58.120015", "step": 6624, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:58.150543", "step": 6624, "epoch": 3 }, { "type": "loss", "content": 0.00011097739479737356, "timestamp": "2025-09-10 02:29:58.158399", "step": 6625, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:29:58.191879", "step": 6625, "epoch": 3 }, { "type": "loss", "content": 6.087979636504315e-05, "timestamp": "2025-09-10 02:29:58.205310", "step": 6626, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:58.236157", "step": 6626, "epoch": 3 }, { "type": "loss", "content": 0.0001310189691139385, "timestamp": "2025-09-10 02:29:58.243858", "step": 6627, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:58.273884", "step": 6627, "epoch": 3 }, { "type": "loss", "content": 0.0006077897851355374, "timestamp": "2025-09-10 02:29:58.300002", "step": 6628, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:58.330536", "step": 6628, "epoch": 3 }, { "type": "loss", "content": 0.00032031405135057867, "timestamp": "2025-09-10 02:29:58.335086", "step": 6629, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:29:58.369661", "step": 6629, "epoch": 3 }, { "type": "loss", "content": 0.00019072220311500132, "timestamp": "2025-09-10 02:29:58.383352", "step": 6630, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:58.416056", "step": 6630, "epoch": 3 }, { "type": "loss", "content": 0.00016121887892950326, "timestamp": "2025-09-10 02:29:58.420014", "step": 6631, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:58.450592", "step": 6631, "epoch": 3 }, { "type": "loss", "content": 0.00014108339382801205, "timestamp": "2025-09-10 02:29:58.478324", "step": 6632, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:58.510726", "step": 6632, "epoch": 3 }, { "type": "loss", "content": 0.00026426606927998364, "timestamp": "2025-09-10 02:29:58.518657", "step": 6633, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:58.549418", "step": 6633, "epoch": 3 }, { "type": "loss", "content": 0.00017428163846489042, "timestamp": "2025-09-10 02:29:58.556254", "step": 6634, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:58.587457", "step": 6634, "epoch": 3 }, { "type": "loss", "content": 9.75916045717895e-05, "timestamp": "2025-09-10 02:29:58.595369", "step": 6635, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:29:58.630012", "step": 6635, "epoch": 3 }, { "type": "loss", "content": 0.0006233084131963551, "timestamp": "2025-09-10 02:29:58.664915", "step": 6636, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:58.695685", "step": 6636, "epoch": 3 }, { "type": "loss", "content": 0.04499921575188637, "timestamp": "2025-09-10 02:29:58.700709", "step": 6637, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:29:58.731676", "step": 6637, "epoch": 3 }, { "type": "loss", "content": 0.0001879000337794423, "timestamp": "2025-09-10 02:29:58.734198", "step": 6638, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:58.765793", "step": 6638, "epoch": 3 }, { "type": "loss", "content": 0.00037380700814537704, "timestamp": "2025-09-10 02:29:58.777909", "step": 6639, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:58.810035", "step": 6639, "epoch": 3 }, { "type": "loss", "content": 0.000758981506805867, "timestamp": "2025-09-10 02:29:58.835096", "step": 6640, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:58.866403", "step": 6640, "epoch": 3 }, { "type": "loss", "content": 0.00025593198370188475, "timestamp": "2025-09-10 02:29:58.869376", "step": 6641, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:58.902071", "step": 6641, "epoch": 3 }, { "type": "loss", "content": 0.00043907135841436684, "timestamp": "2025-09-10 02:29:58.906026", "step": 6642, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:58.939048", "step": 6642, "epoch": 3 }, { "type": "loss", "content": 0.0006986635853536427, "timestamp": "2025-09-10 02:29:58.951100", "step": 6643, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:29:58.982239", "step": 6643, "epoch": 3 }, { "type": "loss", "content": 0.00012294725456740707, "timestamp": "2025-09-10 02:29:59.015713", "step": 6644, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:59.048251", "step": 6644, "epoch": 3 }, { "type": "loss", "content": 0.00016030111873988062, "timestamp": "2025-09-10 02:29:59.056301", "step": 6645, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:29:59.087486", "step": 6645, "epoch": 3 }, { "type": "loss", "content": 0.0019082374637946486, "timestamp": "2025-09-10 02:29:59.098455", "step": 6646, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:59.130766", "step": 6646, "epoch": 3 }, { "type": "loss", "content": 0.003217429621145129, "timestamp": "2025-09-10 02:29:59.141083", "step": 6647, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:59.171745", "step": 6647, "epoch": 3 }, { "type": "loss", "content": 0.0001194212309201248, "timestamp": "2025-09-10 02:29:59.202813", "step": 6648, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:59.233711", "step": 6648, "epoch": 3 }, { "type": "loss", "content": 0.00015604333020746708, "timestamp": "2025-09-10 02:29:59.238488", "step": 6649, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:59.270048", "step": 6649, "epoch": 3 }, { "type": "loss", "content": 0.0003292102483101189, "timestamp": "2025-09-10 02:29:59.277806", "step": 6650, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:59.311348", "step": 6650, "epoch": 3 }, { "type": "loss", "content": 0.0001702476729406044, "timestamp": "2025-09-10 02:29:59.323584", "step": 6651, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:59.353991", "step": 6651, "epoch": 3 }, { "type": "loss", "content": 8.551568316761404e-05, "timestamp": "2025-09-10 02:29:59.387131", "step": 6652, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:59.417958", "step": 6652, "epoch": 3 }, { "type": "loss", "content": 0.0008532029460184276, "timestamp": "2025-09-10 02:29:59.422673", "step": 6653, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:59.454139", "step": 6653, "epoch": 3 }, { "type": "loss", "content": 0.002605307847261429, "timestamp": "2025-09-10 02:29:59.461242", "step": 6654, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:59.492600", "step": 6654, "epoch": 3 }, { "type": "loss", "content": 7.880874909460545e-05, "timestamp": "2025-09-10 02:29:59.499552", "step": 6655, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:29:59.530925", "step": 6655, "epoch": 3 }, { "type": "loss", "content": 0.0003353517968207598, "timestamp": "2025-09-10 02:29:59.558839", "step": 6656, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:59.590568", "step": 6656, "epoch": 3 }, { "type": "loss", "content": 0.00013173728075344115, "timestamp": "2025-09-10 02:29:59.595098", "step": 6657, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:59.625524", "step": 6657, "epoch": 3 }, { "type": "loss", "content": 0.0001572458859300241, "timestamp": "2025-09-10 02:29:59.635684", "step": 6658, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:59.667141", "step": 6658, "epoch": 3 }, { "type": "loss", "content": 0.0001322474709013477, "timestamp": "2025-09-10 02:29:59.674006", "step": 6659, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:29:59.705246", "step": 6659, "epoch": 3 }, { "type": "loss", "content": 0.0003668780846055597, "timestamp": "2025-09-10 02:29:59.736437", "step": 6660, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:29:59.767777", "step": 6660, "epoch": 3 }, { "type": "loss", "content": 0.00014766550157219172, "timestamp": "2025-09-10 02:29:59.777501", "step": 6661, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:29:59.808844", "step": 6661, "epoch": 3 }, { "type": "loss", "content": 0.00010263031435897574, "timestamp": "2025-09-10 02:29:59.816341", "step": 6662, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:29:59.846707", "step": 6662, "epoch": 3 }, { "type": "loss", "content": 0.00016308830527123064, "timestamp": "2025-09-10 02:29:59.850690", "step": 6663, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:29:59.880950", "step": 6663, "epoch": 3 }, { "type": "loss", "content": 0.00017855261103250086, "timestamp": "2025-09-10 02:29:59.908755", "step": 6664, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:29:59.940347", "step": 6664, "epoch": 3 }, { "type": "loss", "content": 0.002598909428343177, "timestamp": "2025-09-10 02:29:59.942693", "step": 6665, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:29:59.972688", "step": 6665, "epoch": 3 }, { "type": "loss", "content": 0.03273782879114151, "timestamp": "2025-09-10 02:29:59.980415", "step": 6666, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:00.011943", "step": 6666, "epoch": 3 }, { "type": "loss", "content": 0.0009771647164598107, "timestamp": "2025-09-10 02:30:00.022056", "step": 6667, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:00.053644", "step": 6667, "epoch": 3 }, { "type": "loss", "content": 0.0001550656888866797, "timestamp": "2025-09-10 02:30:00.081954", "step": 6668, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:00.115747", "step": 6668, "epoch": 3 }, { "type": "loss", "content": 6.419160490622744e-05, "timestamp": "2025-09-10 02:30:00.123685", "step": 6669, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:00.155047", "step": 6669, "epoch": 3 }, { "type": "loss", "content": 0.0010343643371015787, "timestamp": "2025-09-10 02:30:00.162649", "step": 6670, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:00.197090", "step": 6670, "epoch": 3 }, { "type": "loss", "content": 0.0004840478941332549, "timestamp": "2025-09-10 02:30:00.210903", "step": 6671, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:00.246281", "step": 6671, "epoch": 3 }, { "type": "loss", "content": 0.0007365471683442593, "timestamp": "2025-09-10 02:30:00.280494", "step": 6672, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:00.312642", "step": 6672, "epoch": 3 }, { "type": "loss", "content": 0.00023997330572456121, "timestamp": "2025-09-10 02:30:00.318091", "step": 6673, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:00.349431", "step": 6673, "epoch": 3 }, { "type": "loss", "content": 0.0003005491744261235, "timestamp": "2025-09-10 02:30:00.361669", "step": 6674, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:00.392866", "step": 6674, "epoch": 3 }, { "type": "loss", "content": 0.0004848405660595745, "timestamp": "2025-09-10 02:30:00.399901", "step": 6675, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:00.431202", "step": 6675, "epoch": 3 }, { "type": "loss", "content": 0.0005252750124782324, "timestamp": "2025-09-10 02:30:00.459830", "step": 6676, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:00.491167", "step": 6676, "epoch": 3 }, { "type": "loss", "content": 0.00031378321000374854, "timestamp": "2025-09-10 02:30:00.495509", "step": 6677, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:00.527111", "step": 6677, "epoch": 3 }, { "type": "loss", "content": 0.0002828229626175016, "timestamp": "2025-09-10 02:30:00.537098", "step": 6678, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:00.568785", "step": 6678, "epoch": 3 }, { "type": "loss", "content": 0.00015684754180256277, "timestamp": "2025-09-10 02:30:00.576183", "step": 6679, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:00.606885", "step": 6679, "epoch": 3 }, { "type": "loss", "content": 0.0003511524701025337, "timestamp": "2025-09-10 02:30:00.630735", "step": 6680, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:00.662482", "step": 6680, "epoch": 3 }, { "type": "loss", "content": 0.00010571930761216208, "timestamp": "2025-09-10 02:30:00.672718", "step": 6681, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:00.705496", "step": 6681, "epoch": 3 }, { "type": "loss", "content": 0.00010457936878083274, "timestamp": "2025-09-10 02:30:00.709653", "step": 6682, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:00.740382", "step": 6682, "epoch": 3 }, { "type": "loss", "content": 0.00036585141788236797, "timestamp": "2025-09-10 02:30:00.747032", "step": 6683, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:00.782833", "step": 6683, "epoch": 3 }, { "type": "loss", "content": 0.00012327669537626207, "timestamp": "2025-09-10 02:30:00.810543", "step": 6684, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:00.841504", "step": 6684, "epoch": 3 }, { "type": "loss", "content": 0.00021850709163118154, "timestamp": "2025-09-10 02:30:00.844142", "step": 6685, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:00.875302", "step": 6685, "epoch": 3 }, { "type": "loss", "content": 0.00018914879183284938, "timestamp": "2025-09-10 02:30:00.887630", "step": 6686, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:00.918893", "step": 6686, "epoch": 3 }, { "type": "loss", "content": 0.0001374257553834468, "timestamp": "2025-09-10 02:30:00.921422", "step": 6687, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:30:00.957894", "step": 6687, "epoch": 3 }, { "type": "loss", "content": 0.00024864732404239476, "timestamp": "2025-09-10 02:30:00.992738", "step": 6688, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:01.024373", "step": 6688, "epoch": 3 }, { "type": "loss", "content": 0.009503054432570934, "timestamp": "2025-09-10 02:30:01.028078", "step": 6689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:01.063225", "step": 6689, "epoch": 3 }, { "type": "loss", "content": 0.0002550124190747738, "timestamp": "2025-09-10 02:30:01.070513", "step": 6690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 432 ], "flops": 12814563338304 }, "timestamp": "2025-09-10 02:30:01.109691", "step": 6690, "epoch": 3 }, { "type": "loss", "content": 0.0004109316796530038, "timestamp": "2025-09-10 02:30:01.125868", "step": 6691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:01.157073", "step": 6691, "epoch": 3 }, { "type": "loss", "content": 0.00036797040957026184, "timestamp": "2025-09-10 02:30:01.185883", "step": 6692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:01.216983", "step": 6692, "epoch": 3 }, { "type": "loss", "content": 0.00011126509343739599, "timestamp": "2025-09-10 02:30:01.224967", "step": 6693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:01.257817", "step": 6693, "epoch": 3 }, { "type": "loss", "content": 7.454932347172871e-05, "timestamp": "2025-09-10 02:30:01.265509", "step": 6694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:01.296459", "step": 6694, "epoch": 3 }, { "type": "loss", "content": 0.00021754649060312659, "timestamp": "2025-09-10 02:30:01.303768", "step": 6695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:01.334621", "step": 6695, "epoch": 3 }, { "type": "loss", "content": 0.00014526637096423656, "timestamp": "2025-09-10 02:30:01.358375", "step": 6696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:30:01.396064", "step": 6696, "epoch": 3 }, { "type": "loss", "content": 0.0002652324619702995, "timestamp": "2025-09-10 02:30:01.411973", "step": 6697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:01.443004", "step": 6697, "epoch": 3 }, { "type": "loss", "content": 0.00011327861284371465, "timestamp": "2025-09-10 02:30:01.449826", "step": 6698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:01.481727", "step": 6698, "epoch": 3 }, { "type": "loss", "content": 0.00028877213480882347, "timestamp": "2025-09-10 02:30:01.489092", "step": 6699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:01.520717", "step": 6699, "epoch": 3 }, { "type": "loss", "content": 0.00022058105969335884, "timestamp": "2025-09-10 02:30:01.546093", "step": 6700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:01.577682", "step": 6700, "epoch": 3 }, { "type": "loss", "content": 0.0002654526033438742, "timestamp": "2025-09-10 02:30:01.582188", "step": 6701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:01.615409", "step": 6701, "epoch": 3 }, { "type": "loss", "content": 0.00038097609649412334, "timestamp": "2025-09-10 02:30:01.622347", "step": 6702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:01.653294", "step": 6702, "epoch": 3 }, { "type": "loss", "content": 0.00023465848062187433, "timestamp": "2025-09-10 02:30:01.660185", "step": 6703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:01.692737", "step": 6703, "epoch": 3 }, { "type": "loss", "content": 0.00016869421233423054, "timestamp": "2025-09-10 02:30:01.723948", "step": 6704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:01.755326", "step": 6704, "epoch": 3 }, { "type": "loss", "content": 0.0002982628939207643, "timestamp": "2025-09-10 02:30:01.760029", "step": 6705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:01.796033", "step": 6705, "epoch": 3 }, { "type": "loss", "content": 0.00042490853229537606, "timestamp": "2025-09-10 02:30:01.808562", "step": 6706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:01.842911", "step": 6706, "epoch": 3 }, { "type": "loss", "content": 0.00036351257585920393, "timestamp": "2025-09-10 02:30:01.847262", "step": 6707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:01.878290", "step": 6707, "epoch": 3 }, { "type": "loss", "content": 0.00016362879250664264, "timestamp": "2025-09-10 02:30:01.906042", "step": 6708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:01.937236", "step": 6708, "epoch": 3 }, { "type": "loss", "content": 0.00014777052274439484, "timestamp": "2025-09-10 02:30:01.942279", "step": 6709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:01.973464", "step": 6709, "epoch": 3 }, { "type": "loss", "content": 0.00013149731967132539, "timestamp": "2025-09-10 02:30:01.979403", "step": 6710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:02.020841", "step": 6710, "epoch": 3 }, { "type": "loss", "content": 9.87174644251354e-05, "timestamp": "2025-09-10 02:30:02.028460", "step": 6711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:02.059923", "step": 6711, "epoch": 3 }, { "type": "loss", "content": 9.164093353319913e-05, "timestamp": "2025-09-10 02:30:02.097837", "step": 6712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:02.163435", "step": 6712, "epoch": 3 }, { "type": "loss", "content": 5.91975731367711e-05, "timestamp": "2025-09-10 02:30:02.171839", "step": 6713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:02.215126", "step": 6713, "epoch": 3 }, { "type": "loss", "content": 0.00025522714713588357, "timestamp": "2025-09-10 02:30:02.227085", "step": 6714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:02.266867", "step": 6714, "epoch": 3 }, { "type": "loss", "content": 0.0016006106743589044, "timestamp": "2025-09-10 02:30:02.279452", "step": 6715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:02.321903", "step": 6715, "epoch": 3 }, { "type": "loss", "content": 5.307100946083665e-05, "timestamp": "2025-09-10 02:30:02.346711", "step": 6716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:02.381968", "step": 6716, "epoch": 3 }, { "type": "loss", "content": 9.852695802692324e-05, "timestamp": "2025-09-10 02:30:02.384511", "step": 6717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:02.418468", "step": 6717, "epoch": 3 }, { "type": "loss", "content": 0.0003025185433216393, "timestamp": "2025-09-10 02:30:02.425355", "step": 6718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:02.456597", "step": 6718, "epoch": 3 }, { "type": "loss", "content": 0.00015510991215705872, "timestamp": "2025-09-10 02:30:02.469113", "step": 6719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:02.500955", "step": 6719, "epoch": 3 }, { "type": "loss", "content": 0.0001259516429854557, "timestamp": "2025-09-10 02:30:02.528547", "step": 6720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:02.559964", "step": 6720, "epoch": 3 }, { "type": "loss", "content": 0.0008776256581768394, "timestamp": "2025-09-10 02:30:02.562312", "step": 6721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:02.594792", "step": 6721, "epoch": 3 }, { "type": "loss", "content": 7.6968630310148e-05, "timestamp": "2025-09-10 02:30:02.605651", "step": 6722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:02.638245", "step": 6722, "epoch": 3 }, { "type": "loss", "content": 0.000490417645778507, "timestamp": "2025-09-10 02:30:02.648085", "step": 6723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:02.679494", "step": 6723, "epoch": 3 }, { "type": "loss", "content": 0.0023274854756891727, "timestamp": "2025-09-10 02:30:02.708135", "step": 6724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:02.738780", "step": 6724, "epoch": 3 }, { "type": "loss", "content": 7.722565351286903e-05, "timestamp": "2025-09-10 02:30:02.741949", "step": 6725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:02.775968", "step": 6725, "epoch": 3 }, { "type": "loss", "content": 0.0001424977817805484, "timestamp": "2025-09-10 02:30:02.782594", "step": 6726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:02.814355", "step": 6726, "epoch": 3 }, { "type": "loss", "content": 0.0002022543194470927, "timestamp": "2025-09-10 02:30:02.820951", "step": 6727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:02.852914", "step": 6727, "epoch": 3 }, { "type": "loss", "content": 0.0003710582968778908, "timestamp": "2025-09-10 02:30:02.876861", "step": 6728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:02.909251", "step": 6728, "epoch": 3 }, { "type": "loss", "content": 0.00013393805420491844, "timestamp": "2025-09-10 02:30:02.911317", "step": 6729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:02.942365", "step": 6729, "epoch": 3 }, { "type": "loss", "content": 0.00025334549718536437, "timestamp": "2025-09-10 02:30:02.949941", "step": 6730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:02.981314", "step": 6730, "epoch": 3 }, { "type": "loss", "content": 0.0015375103102996945, "timestamp": "2025-09-10 02:30:02.985634", "step": 6731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:03.016998", "step": 6731, "epoch": 3 }, { "type": "loss", "content": 0.00120832200627774, "timestamp": "2025-09-10 02:30:03.045410", "step": 6732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:03.078789", "step": 6732, "epoch": 3 }, { "type": "loss", "content": 0.0019324652384966612, "timestamp": "2025-09-10 02:30:03.081264", "step": 6733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:03.112557", "step": 6733, "epoch": 3 }, { "type": "loss", "content": 0.00045514092198573053, "timestamp": "2025-09-10 02:30:03.115146", "step": 6734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:03.146534", "step": 6734, "epoch": 3 }, { "type": "loss", "content": 0.0002553352096583694, "timestamp": "2025-09-10 02:30:03.157281", "step": 6735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:03.192725", "step": 6735, "epoch": 3 }, { "type": "loss", "content": 0.00010565890261204913, "timestamp": "2025-09-10 02:30:03.227326", "step": 6736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:03.262594", "step": 6736, "epoch": 3 }, { "type": "loss", "content": 0.00019748820341192186, "timestamp": "2025-09-10 02:30:03.275653", "step": 6737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:03.308506", "step": 6737, "epoch": 3 }, { "type": "loss", "content": 0.00015901295410003513, "timestamp": "2025-09-10 02:30:03.315595", "step": 6738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:03.356029", "step": 6738, "epoch": 3 }, { "type": "loss", "content": 0.00040654095937497914, "timestamp": "2025-09-10 02:30:03.369737", "step": 6739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:03.401756", "step": 6739, "epoch": 3 }, { "type": "loss", "content": 7.327982893912122e-05, "timestamp": "2025-09-10 02:30:03.430063", "step": 6740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:03.465677", "step": 6740, "epoch": 3 }, { "type": "loss", "content": 0.001900206902064383, "timestamp": "2025-09-10 02:30:03.472042", "step": 6741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:03.509158", "step": 6741, "epoch": 3 }, { "type": "loss", "content": 0.00012812459317501634, "timestamp": "2025-09-10 02:30:03.516098", "step": 6742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:03.547366", "step": 6742, "epoch": 3 }, { "type": "loss", "content": 0.00014215106784831733, "timestamp": "2025-09-10 02:30:03.551339", "step": 6743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:03.583264", "step": 6743, "epoch": 3 }, { "type": "loss", "content": 0.0015154675347730517, "timestamp": "2025-09-10 02:30:03.608006", "step": 6744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:03.639794", "step": 6744, "epoch": 3 }, { "type": "loss", "content": 0.00048416477511636913, "timestamp": "2025-09-10 02:30:03.645121", "step": 6745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:03.680280", "step": 6745, "epoch": 3 }, { "type": "loss", "content": 0.0016691208584234118, "timestamp": "2025-09-10 02:30:03.693642", "step": 6746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:03.726243", "step": 6746, "epoch": 3 }, { "type": "loss", "content": 0.0012729717418551445, "timestamp": "2025-09-10 02:30:03.733692", "step": 6747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:03.766399", "step": 6747, "epoch": 3 }, { "type": "loss", "content": 6.610819400520995e-05, "timestamp": "2025-09-10 02:30:03.799709", "step": 6748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:03.831631", "step": 6748, "epoch": 3 }, { "type": "loss", "content": 0.0004302055749576539, "timestamp": "2025-09-10 02:30:03.836191", "step": 6749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:30:03.882183", "step": 6749, "epoch": 3 }, { "type": "loss", "content": 0.00018864336016122252, "timestamp": "2025-09-10 02:30:03.901351", "step": 6750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:03.933412", "step": 6750, "epoch": 3 }, { "type": "loss", "content": 0.00017309685063082725, "timestamp": "2025-09-10 02:30:03.940633", "step": 6751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:03.974347", "step": 6751, "epoch": 3 }, { "type": "loss", "content": 6.622447835979983e-05, "timestamp": "2025-09-10 02:30:04.008600", "step": 6752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:04.039796", "step": 6752, "epoch": 3 }, { "type": "loss", "content": 0.0003488397051114589, "timestamp": "2025-09-10 02:30:04.044117", "step": 6753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:04.076402", "step": 6753, "epoch": 3 }, { "type": "loss", "content": 0.00013236506492830813, "timestamp": "2025-09-10 02:30:04.083647", "step": 6754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:04.115194", "step": 6754, "epoch": 3 }, { "type": "loss", "content": 0.00018496920529287308, "timestamp": "2025-09-10 02:30:04.122414", "step": 6755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:04.157677", "step": 6755, "epoch": 3 }, { "type": "loss", "content": 0.00016140654042828828, "timestamp": "2025-09-10 02:30:04.192428", "step": 6756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:04.224380", "step": 6756, "epoch": 3 }, { "type": "loss", "content": 6.932354153832421e-05, "timestamp": "2025-09-10 02:30:04.228472", "step": 6757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:30:04.268473", "step": 6757, "epoch": 3 }, { "type": "loss", "content": 0.0006969981477595866, "timestamp": "2025-09-10 02:30:04.284320", "step": 6758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:04.320015", "step": 6758, "epoch": 3 }, { "type": "loss", "content": 6.715174822602421e-05, "timestamp": "2025-09-10 02:30:04.333442", "step": 6759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:04.366619", "step": 6759, "epoch": 3 }, { "type": "loss", "content": 0.00027297786436975, "timestamp": "2025-09-10 02:30:04.392481", "step": 6760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:04.422941", "step": 6760, "epoch": 3 }, { "type": "loss", "content": 4.975103001925163e-05, "timestamp": "2025-09-10 02:30:04.425156", "step": 6761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:04.456491", "step": 6761, "epoch": 3 }, { "type": "loss", "content": 0.00024496050900779665, "timestamp": "2025-09-10 02:30:04.466667", "step": 6762, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:30:15.292289", "step": 6762, "epoch": 3 }, { "type": "pplx", "content": 23855258.0813289, "timestamp": "2025-09-10 02:30:15.295377", "step": 6762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:15.326564", "step": 6762, "epoch": 3 }, { "type": "loss", "content": 0.00013265803863760084, "timestamp": "2025-09-10 02:30:15.329741", "step": 6763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:15.364987", "step": 6763, "epoch": 3 }, { "type": "loss", "content": 0.0006455808761529624, "timestamp": "2025-09-10 02:30:15.393246", "step": 6764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:15.425460", "step": 6764, "epoch": 3 }, { "type": "loss", "content": 0.0010767403291538358, "timestamp": "2025-09-10 02:30:15.438173", "step": 6765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:15.469805", "step": 6765, "epoch": 3 }, { "type": "loss", "content": 0.00016831964603625238, "timestamp": "2025-09-10 02:30:15.477267", "step": 6766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:15.540508", "step": 6766, "epoch": 3 }, { "type": "loss", "content": 7.969191210577264e-05, "timestamp": "2025-09-10 02:30:15.547193", "step": 6767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:15.602576", "step": 6767, "epoch": 3 }, { "type": "loss", "content": 0.008119060657918453, "timestamp": "2025-09-10 02:30:15.634471", "step": 6768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:15.686556", "step": 6768, "epoch": 3 }, { "type": "loss", "content": 0.0025774035602808, "timestamp": "2025-09-10 02:30:15.690953", "step": 6769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:15.732130", "step": 6769, "epoch": 3 }, { "type": "loss", "content": 0.0003182920045219362, "timestamp": "2025-09-10 02:30:15.738838", "step": 6770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:15.772239", "step": 6770, "epoch": 3 }, { "type": "loss", "content": 0.00016758790297899395, "timestamp": "2025-09-10 02:30:15.784544", "step": 6771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:15.815462", "step": 6771, "epoch": 3 }, { "type": "loss", "content": 0.0013579537626355886, "timestamp": "2025-09-10 02:30:15.846516", "step": 6772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:15.881090", "step": 6772, "epoch": 3 }, { "type": "loss", "content": 0.0003011829103343189, "timestamp": "2025-09-10 02:30:15.888746", "step": 6773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:15.922440", "step": 6773, "epoch": 3 }, { "type": "loss", "content": 0.0005984340095892549, "timestamp": "2025-09-10 02:30:15.935758", "step": 6774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:15.969843", "step": 6774, "epoch": 3 }, { "type": "loss", "content": 8.118032565107569e-05, "timestamp": "2025-09-10 02:30:15.981573", "step": 6775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:16.014106", "step": 6775, "epoch": 3 }, { "type": "loss", "content": 6.917355494806543e-05, "timestamp": "2025-09-10 02:30:16.041935", "step": 6776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:16.072768", "step": 6776, "epoch": 3 }, { "type": "loss", "content": 6.683034735033289e-05, "timestamp": "2025-09-10 02:30:16.077988", "step": 6777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:16.110417", "step": 6777, "epoch": 3 }, { "type": "loss", "content": 0.0003518997982610017, "timestamp": "2025-09-10 02:30:16.117652", "step": 6778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:16.148684", "step": 6778, "epoch": 3 }, { "type": "loss", "content": 5.0944421673193574e-05, "timestamp": "2025-09-10 02:30:16.151125", "step": 6779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:16.182224", "step": 6779, "epoch": 3 }, { "type": "loss", "content": 0.00011305516818538308, "timestamp": "2025-09-10 02:30:16.210513", "step": 6780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:16.242733", "step": 6780, "epoch": 3 }, { "type": "loss", "content": 0.004298292566090822, "timestamp": "2025-09-10 02:30:16.251720", "step": 6781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:16.282589", "step": 6781, "epoch": 3 }, { "type": "loss", "content": 0.00013062043581157923, "timestamp": "2025-09-10 02:30:16.294634", "step": 6782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:16.329331", "step": 6782, "epoch": 3 }, { "type": "loss", "content": 0.012010819278657436, "timestamp": "2025-09-10 02:30:16.336729", "step": 6783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:16.367635", "step": 6783, "epoch": 3 }, { "type": "loss", "content": 0.00013636215589940548, "timestamp": "2025-09-10 02:30:16.399380", "step": 6784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:16.437640", "step": 6784, "epoch": 3 }, { "type": "loss", "content": 0.00012389972107484937, "timestamp": "2025-09-10 02:30:16.442370", "step": 6785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:16.475988", "step": 6785, "epoch": 3 }, { "type": "loss", "content": 0.0001779487356543541, "timestamp": "2025-09-10 02:30:16.483346", "step": 6786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:16.517989", "step": 6786, "epoch": 3 }, { "type": "loss", "content": 0.001430910429917276, "timestamp": "2025-09-10 02:30:16.524922", "step": 6787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:16.558105", "step": 6787, "epoch": 3 }, { "type": "loss", "content": 8.068051829468459e-05, "timestamp": "2025-09-10 02:30:16.590893", "step": 6788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:16.627308", "step": 6788, "epoch": 3 }, { "type": "loss", "content": 0.00024358855444006622, "timestamp": "2025-09-10 02:30:16.640280", "step": 6789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:16.674026", "step": 6789, "epoch": 3 }, { "type": "loss", "content": 0.0007300904835574329, "timestamp": "2025-09-10 02:30:16.678254", "step": 6790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:16.716899", "step": 6790, "epoch": 3 }, { "type": "loss", "content": 2.7813763153972104e-05, "timestamp": "2025-09-10 02:30:16.719438", "step": 6791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:16.760030", "step": 6791, "epoch": 3 }, { "type": "loss", "content": 8.391224400838837e-05, "timestamp": "2025-09-10 02:30:16.788260", "step": 6792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:16.824861", "step": 6792, "epoch": 3 }, { "type": "loss", "content": 0.0007873232243582606, "timestamp": "2025-09-10 02:30:16.829373", "step": 6793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 512 ], "flops": 15187581968384 }, "timestamp": "2025-09-10 02:30:16.874610", "step": 6793, "epoch": 3 }, { "type": "loss", "content": 8.269152749562636e-05, "timestamp": "2025-09-10 02:30:16.892285", "step": 6794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:16.929849", "step": 6794, "epoch": 3 }, { "type": "loss", "content": 0.00020102993585169315, "timestamp": "2025-09-10 02:30:16.937347", "step": 6795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:16.970154", "step": 6795, "epoch": 3 }, { "type": "loss", "content": 0.0012203119695186615, "timestamp": "2025-09-10 02:30:16.997874", "step": 6796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:17.029699", "step": 6796, "epoch": 3 }, { "type": "loss", "content": 0.010345556773245335, "timestamp": "2025-09-10 02:30:17.039296", "step": 6797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:17.073566", "step": 6797, "epoch": 3 }, { "type": "loss", "content": 0.007398456335067749, "timestamp": "2025-09-10 02:30:17.077519", "step": 6798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:17.119440", "step": 6798, "epoch": 3 }, { "type": "loss", "content": 0.0006762798875570297, "timestamp": "2025-09-10 02:30:17.135062", "step": 6799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:17.168051", "step": 6799, "epoch": 3 }, { "type": "loss", "content": 0.00012070146476617083, "timestamp": "2025-09-10 02:30:17.196027", "step": 6800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:17.230664", "step": 6800, "epoch": 3 }, { "type": "loss", "content": 5.54533107788302e-05, "timestamp": "2025-09-10 02:30:17.247016", "step": 6801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:30:17.285299", "step": 6801, "epoch": 3 }, { "type": "loss", "content": 6.327310256892815e-05, "timestamp": "2025-09-10 02:30:17.299295", "step": 6802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:17.331893", "step": 6802, "epoch": 3 }, { "type": "loss", "content": 0.030736997723579407, "timestamp": "2025-09-10 02:30:17.334233", "step": 6803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:17.369517", "step": 6803, "epoch": 3 }, { "type": "loss", "content": 0.0001166960719274357, "timestamp": "2025-09-10 02:30:17.402857", "step": 6804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:17.440516", "step": 6804, "epoch": 3 }, { "type": "loss", "content": 0.00021152500994503498, "timestamp": "2025-09-10 02:30:17.455582", "step": 6805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:17.489705", "step": 6805, "epoch": 3 }, { "type": "loss", "content": 0.00016026229423005134, "timestamp": "2025-09-10 02:30:17.492235", "step": 6806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:17.531358", "step": 6806, "epoch": 3 }, { "type": "loss", "content": 0.0014069009339436889, "timestamp": "2025-09-10 02:30:17.546926", "step": 6807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:17.581034", "step": 6807, "epoch": 3 }, { "type": "loss", "content": 0.0012990307295694947, "timestamp": "2025-09-10 02:30:17.609309", "step": 6808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:17.642719", "step": 6808, "epoch": 3 }, { "type": "loss", "content": 0.0327615961432457, "timestamp": "2025-09-10 02:30:17.650909", "step": 6809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:17.682485", "step": 6809, "epoch": 3 }, { "type": "loss", "content": 0.0003256215713918209, "timestamp": "2025-09-10 02:30:17.689780", "step": 6810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:17.722498", "step": 6810, "epoch": 3 }, { "type": "loss", "content": 0.00016400113236159086, "timestamp": "2025-09-10 02:30:17.732387", "step": 6811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:17.765986", "step": 6811, "epoch": 3 }, { "type": "loss", "content": 3.386579919606447e-05, "timestamp": "2025-09-10 02:30:17.793719", "step": 6812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:17.829048", "step": 6812, "epoch": 3 }, { "type": "loss", "content": 7.543731771875173e-05, "timestamp": "2025-09-10 02:30:17.838282", "step": 6813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:17.871913", "step": 6813, "epoch": 3 }, { "type": "loss", "content": 0.0010472764261066914, "timestamp": "2025-09-10 02:30:17.882138", "step": 6814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:17.915235", "step": 6814, "epoch": 3 }, { "type": "loss", "content": 0.00010902778012678027, "timestamp": "2025-09-10 02:30:17.917728", "step": 6815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:17.951419", "step": 6815, "epoch": 3 }, { "type": "loss", "content": 0.0002544302260503173, "timestamp": "2025-09-10 02:30:17.976575", "step": 6816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:18.008867", "step": 6816, "epoch": 3 }, { "type": "loss", "content": 0.0003031869127880782, "timestamp": "2025-09-10 02:30:18.013772", "step": 6817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:18.049470", "step": 6817, "epoch": 3 }, { "type": "loss", "content": 0.00049980339827016, "timestamp": "2025-09-10 02:30:18.060303", "step": 6818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:18.097031", "step": 6818, "epoch": 3 }, { "type": "loss", "content": 0.00019091797003056854, "timestamp": "2025-09-10 02:30:18.110426", "step": 6819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:18.143731", "step": 6819, "epoch": 3 }, { "type": "loss", "content": 0.0006083925254642963, "timestamp": "2025-09-10 02:30:18.168937", "step": 6820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:18.203331", "step": 6820, "epoch": 3 }, { "type": "loss", "content": 3.496772114885971e-05, "timestamp": "2025-09-10 02:30:18.216022", "step": 6821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:18.251835", "step": 6821, "epoch": 3 }, { "type": "loss", "content": 0.0001554272894281894, "timestamp": "2025-09-10 02:30:18.255935", "step": 6822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:18.291116", "step": 6822, "epoch": 3 }, { "type": "loss", "content": 0.0005963979056105018, "timestamp": "2025-09-10 02:30:18.301097", "step": 6823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:18.332866", "step": 6823, "epoch": 3 }, { "type": "loss", "content": 8.008737495401874e-05, "timestamp": "2025-09-10 02:30:18.364544", "step": 6824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:18.399060", "step": 6824, "epoch": 3 }, { "type": "loss", "content": 0.00042097517871297896, "timestamp": "2025-09-10 02:30:18.411561", "step": 6825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:18.448099", "step": 6825, "epoch": 3 }, { "type": "loss", "content": 6.969293463043869e-05, "timestamp": "2025-09-10 02:30:18.457796", "step": 6826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:18.493630", "step": 6826, "epoch": 3 }, { "type": "loss", "content": 0.0014862669631838799, "timestamp": "2025-09-10 02:30:18.501335", "step": 6827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:18.535521", "step": 6827, "epoch": 3 }, { "type": "loss", "content": 0.00019058329053223133, "timestamp": "2025-09-10 02:30:18.563006", "step": 6828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:18.601204", "step": 6828, "epoch": 3 }, { "type": "loss", "content": 0.0002576792612671852, "timestamp": "2025-09-10 02:30:18.605856", "step": 6829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:18.637920", "step": 6829, "epoch": 3 }, { "type": "loss", "content": 0.0004352860269136727, "timestamp": "2025-09-10 02:30:18.645472", "step": 6830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:18.681478", "step": 6830, "epoch": 3 }, { "type": "loss", "content": 3.436298720771447e-05, "timestamp": "2025-09-10 02:30:18.688357", "step": 6831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:18.724012", "step": 6831, "epoch": 3 }, { "type": "loss", "content": 0.00022188770526554435, "timestamp": "2025-09-10 02:30:18.749357", "step": 6832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:18.780158", "step": 6832, "epoch": 3 }, { "type": "loss", "content": 0.0015023789601400495, "timestamp": "2025-09-10 02:30:18.784655", "step": 6833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:18.820484", "step": 6833, "epoch": 3 }, { "type": "loss", "content": 0.0012278666254132986, "timestamp": "2025-09-10 02:30:18.832468", "step": 6834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:18.865085", "step": 6834, "epoch": 3 }, { "type": "loss", "content": 0.005471336655318737, "timestamp": "2025-09-10 02:30:18.869075", "step": 6835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:18.901174", "step": 6835, "epoch": 3 }, { "type": "loss", "content": 0.0007918172632344067, "timestamp": "2025-09-10 02:30:18.929621", "step": 6836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:18.960867", "step": 6836, "epoch": 3 }, { "type": "loss", "content": 0.00016386432980652899, "timestamp": "2025-09-10 02:30:18.968220", "step": 6837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:18.998902", "step": 6837, "epoch": 3 }, { "type": "loss", "content": 0.00040015511331148446, "timestamp": "2025-09-10 02:30:19.005819", "step": 6838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:19.039481", "step": 6838, "epoch": 3 }, { "type": "loss", "content": 0.0003809529298450798, "timestamp": "2025-09-10 02:30:19.050226", "step": 6839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:19.089016", "step": 6839, "epoch": 3 }, { "type": "loss", "content": 0.00012751105532515794, "timestamp": "2025-09-10 02:30:19.116968", "step": 6840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:19.154062", "step": 6840, "epoch": 3 }, { "type": "loss", "content": 0.0002030668401857838, "timestamp": "2025-09-10 02:30:19.156202", "step": 6841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:19.196680", "step": 6841, "epoch": 3 }, { "type": "loss", "content": 8.849247387843207e-05, "timestamp": "2025-09-10 02:30:19.207016", "step": 6842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:19.240281", "step": 6842, "epoch": 3 }, { "type": "loss", "content": 7.702614675508812e-05, "timestamp": "2025-09-10 02:30:19.242811", "step": 6843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:19.274075", "step": 6843, "epoch": 3 }, { "type": "loss", "content": 0.00011114530934719369, "timestamp": "2025-09-10 02:30:19.299505", "step": 6844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:19.330251", "step": 6844, "epoch": 3 }, { "type": "loss", "content": 0.00036023682332597673, "timestamp": "2025-09-10 02:30:19.332826", "step": 6845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:19.363969", "step": 6845, "epoch": 3 }, { "type": "loss", "content": 0.00022267237363848835, "timestamp": "2025-09-10 02:30:19.374234", "step": 6846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:19.413588", "step": 6846, "epoch": 3 }, { "type": "loss", "content": 0.00026117305969819427, "timestamp": "2025-09-10 02:30:19.426145", "step": 6847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:19.457215", "step": 6847, "epoch": 3 }, { "type": "loss", "content": 0.00017910859605763108, "timestamp": "2025-09-10 02:30:19.484847", "step": 6848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:19.518918", "step": 6848, "epoch": 3 }, { "type": "loss", "content": 0.00025339677813462913, "timestamp": "2025-09-10 02:30:19.528648", "step": 6849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:19.569357", "step": 6849, "epoch": 3 }, { "type": "loss", "content": 0.00041114582563750446, "timestamp": "2025-09-10 02:30:19.585004", "step": 6850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:19.626859", "step": 6850, "epoch": 3 }, { "type": "loss", "content": 0.002163324737921357, "timestamp": "2025-09-10 02:30:19.640712", "step": 6851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:19.677256", "step": 6851, "epoch": 3 }, { "type": "loss", "content": 0.00018701299268286675, "timestamp": "2025-09-10 02:30:19.705133", "step": 6852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:19.738947", "step": 6852, "epoch": 3 }, { "type": "loss", "content": 0.0005152305820956826, "timestamp": "2025-09-10 02:30:19.751590", "step": 6853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:19.785229", "step": 6853, "epoch": 3 }, { "type": "loss", "content": 0.007716981228441, "timestamp": "2025-09-10 02:30:19.792581", "step": 6854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:19.823461", "step": 6854, "epoch": 3 }, { "type": "loss", "content": 0.0002965559542644769, "timestamp": "2025-09-10 02:30:19.826056", "step": 6855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:19.862266", "step": 6855, "epoch": 3 }, { "type": "loss", "content": 0.00017139650299213827, "timestamp": "2025-09-10 02:30:19.893207", "step": 6856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:19.928880", "step": 6856, "epoch": 3 }, { "type": "loss", "content": 0.00021169218234717846, "timestamp": "2025-09-10 02:30:19.936746", "step": 6857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:19.976093", "step": 6857, "epoch": 3 }, { "type": "loss", "content": 0.00011072350753238425, "timestamp": "2025-09-10 02:30:19.989442", "step": 6858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:20.022347", "step": 6858, "epoch": 3 }, { "type": "loss", "content": 0.0019564726389944553, "timestamp": "2025-09-10 02:30:20.029521", "step": 6859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:20.062143", "step": 6859, "epoch": 3 }, { "type": "loss", "content": 0.00042336867772974074, "timestamp": "2025-09-10 02:30:20.095512", "step": 6860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:20.133531", "step": 6860, "epoch": 3 }, { "type": "loss", "content": 0.00017614095122553408, "timestamp": "2025-09-10 02:30:20.138738", "step": 6861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:20.174233", "step": 6861, "epoch": 3 }, { "type": "loss", "content": 0.00017980553093366325, "timestamp": "2025-09-10 02:30:20.184889", "step": 6862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:20.222653", "step": 6862, "epoch": 3 }, { "type": "loss", "content": 7.931239815661684e-05, "timestamp": "2025-09-10 02:30:20.230060", "step": 6863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:20.260929", "step": 6863, "epoch": 3 }, { "type": "loss", "content": 0.02051333151757717, "timestamp": "2025-09-10 02:30:20.285056", "step": 6864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:30:20.325514", "step": 6864, "epoch": 3 }, { "type": "loss", "content": 8.088215690804645e-05, "timestamp": "2025-09-10 02:30:20.338810", "step": 6865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:20.372656", "step": 6865, "epoch": 3 }, { "type": "loss", "content": 0.0001564481353852898, "timestamp": "2025-09-10 02:30:20.379373", "step": 6866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:20.414627", "step": 6866, "epoch": 3 }, { "type": "loss", "content": 0.00010649115574778989, "timestamp": "2025-09-10 02:30:20.427159", "step": 6867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:20.457949", "step": 6867, "epoch": 3 }, { "type": "loss", "content": 0.0001269724016310647, "timestamp": "2025-09-10 02:30:20.486703", "step": 6868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:20.524304", "step": 6868, "epoch": 3 }, { "type": "loss", "content": 0.0010912258876487613, "timestamp": "2025-09-10 02:30:20.529129", "step": 6869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:20.560997", "step": 6869, "epoch": 3 }, { "type": "loss", "content": 9.636014874558896e-05, "timestamp": "2025-09-10 02:30:20.567548", "step": 6870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:20.602864", "step": 6870, "epoch": 3 }, { "type": "loss", "content": 0.000113781621621456, "timestamp": "2025-09-10 02:30:20.616611", "step": 6871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:20.654164", "step": 6871, "epoch": 3 }, { "type": "loss", "content": 9.113257692661136e-05, "timestamp": "2025-09-10 02:30:20.685367", "step": 6872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:20.715516", "step": 6872, "epoch": 3 }, { "type": "loss", "content": 0.0006366458837874234, "timestamp": "2025-09-10 02:30:20.720287", "step": 6873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:20.751250", "step": 6873, "epoch": 3 }, { "type": "loss", "content": 0.0002216670400230214, "timestamp": "2025-09-10 02:30:20.758358", "step": 6874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:20.797148", "step": 6874, "epoch": 3 }, { "type": "loss", "content": 0.0001820830802898854, "timestamp": "2025-09-10 02:30:20.809371", "step": 6875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:20.848562", "step": 6875, "epoch": 3 }, { "type": "loss", "content": 0.005944172386080027, "timestamp": "2025-09-10 02:30:20.876414", "step": 6876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:20.909153", "step": 6876, "epoch": 3 }, { "type": "loss", "content": 5.662976036546752e-05, "timestamp": "2025-09-10 02:30:20.911434", "step": 6877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:20.951774", "step": 6877, "epoch": 3 }, { "type": "loss", "content": 0.00043625704711303115, "timestamp": "2025-09-10 02:30:20.965576", "step": 6878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:21.004372", "step": 6878, "epoch": 3 }, { "type": "loss", "content": 0.00010746198677225038, "timestamp": "2025-09-10 02:30:21.012127", "step": 6879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:21.044248", "step": 6879, "epoch": 3 }, { "type": "loss", "content": 0.00030602794140577316, "timestamp": "2025-09-10 02:30:21.075223", "step": 6880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:21.114259", "step": 6880, "epoch": 3 }, { "type": "loss", "content": 0.00037156790494918823, "timestamp": "2025-09-10 02:30:21.121592", "step": 6881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:21.152891", "step": 6881, "epoch": 3 }, { "type": "loss", "content": 0.00018727740098256618, "timestamp": "2025-09-10 02:30:21.163096", "step": 6882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:21.198073", "step": 6882, "epoch": 3 }, { "type": "loss", "content": 9.839085396379232e-05, "timestamp": "2025-09-10 02:30:21.202551", "step": 6883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:21.233647", "step": 6883, "epoch": 3 }, { "type": "loss", "content": 0.000886984693352133, "timestamp": "2025-09-10 02:30:21.261268", "step": 6884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:21.294025", "step": 6884, "epoch": 3 }, { "type": "loss", "content": 0.00016462511848658323, "timestamp": "2025-09-10 02:30:21.301922", "step": 6885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:21.333917", "step": 6885, "epoch": 3 }, { "type": "loss", "content": 0.0043139709159731865, "timestamp": "2025-09-10 02:30:21.340952", "step": 6886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:21.380516", "step": 6886, "epoch": 3 }, { "type": "loss", "content": 0.0008590264478698373, "timestamp": "2025-09-10 02:30:21.387960", "step": 6887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:21.424227", "step": 6887, "epoch": 3 }, { "type": "loss", "content": 0.0003677209315355867, "timestamp": "2025-09-10 02:30:21.452110", "step": 6888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:21.482477", "step": 6888, "epoch": 3 }, { "type": "loss", "content": 9.307049185736105e-05, "timestamp": "2025-09-10 02:30:21.491069", "step": 6889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:21.521828", "step": 6889, "epoch": 3 }, { "type": "loss", "content": 8.364223322132602e-05, "timestamp": "2025-09-10 02:30:21.534406", "step": 6890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:21.570779", "step": 6890, "epoch": 3 }, { "type": "loss", "content": 0.00047637257375754416, "timestamp": "2025-09-10 02:30:21.574792", "step": 6891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:21.607469", "step": 6891, "epoch": 3 }, { "type": "loss", "content": 0.0028176165651530027, "timestamp": "2025-09-10 02:30:21.632477", "step": 6892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:21.697379", "step": 6892, "epoch": 3 }, { "type": "loss", "content": 0.0002470030449330807, "timestamp": "2025-09-10 02:30:21.699510", "step": 6893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:21.735128", "step": 6893, "epoch": 3 }, { "type": "loss", "content": 0.0006550102843903005, "timestamp": "2025-09-10 02:30:21.748541", "step": 6894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:21.789661", "step": 6894, "epoch": 3 }, { "type": "loss", "content": 0.00011021040700143203, "timestamp": "2025-09-10 02:30:21.796606", "step": 6895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:21.831675", "step": 6895, "epoch": 3 }, { "type": "loss", "content": 0.0010865674121305346, "timestamp": "2025-09-10 02:30:21.860184", "step": 6896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:21.893488", "step": 6896, "epoch": 3 }, { "type": "loss", "content": 0.0001547595311421901, "timestamp": "2025-09-10 02:30:21.896158", "step": 6897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:21.926703", "step": 6897, "epoch": 3 }, { "type": "loss", "content": 0.00014410761650651693, "timestamp": "2025-09-10 02:30:21.937151", "step": 6898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:21.967455", "step": 6898, "epoch": 3 }, { "type": "loss", "content": 0.0005947855534031987, "timestamp": "2025-09-10 02:30:21.971499", "step": 6899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:22.002647", "step": 6899, "epoch": 3 }, { "type": "loss", "content": 0.00025538000045344234, "timestamp": "2025-09-10 02:30:22.027939", "step": 6900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:22.060817", "step": 6900, "epoch": 3 }, { "type": "loss", "content": 0.0005543892038986087, "timestamp": "2025-09-10 02:30:22.073926", "step": 6901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:22.114769", "step": 6901, "epoch": 3 }, { "type": "loss", "content": 0.0021151488181203604, "timestamp": "2025-09-10 02:30:22.121566", "step": 6902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:22.162625", "step": 6902, "epoch": 3 }, { "type": "loss", "content": 0.00011541605636011809, "timestamp": "2025-09-10 02:30:22.166933", "step": 6903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:22.197743", "step": 6903, "epoch": 3 }, { "type": "loss", "content": 0.0013175479834899306, "timestamp": "2025-09-10 02:30:22.226283", "step": 6904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:30:22.281595", "step": 6904, "epoch": 3 }, { "type": "loss", "content": 3.248906432418153e-05, "timestamp": "2025-09-10 02:30:22.298306", "step": 6905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:22.328426", "step": 6905, "epoch": 3 }, { "type": "loss", "content": 4.818878369405866e-05, "timestamp": "2025-09-10 02:30:22.330887", "step": 6906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:22.361346", "step": 6906, "epoch": 3 }, { "type": "loss", "content": 0.0005144443712197244, "timestamp": "2025-09-10 02:30:22.368298", "step": 6907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:22.400777", "step": 6907, "epoch": 3 }, { "type": "loss", "content": 4.7616198571631685e-05, "timestamp": "2025-09-10 02:30:22.434318", "step": 6908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:22.469807", "step": 6908, "epoch": 3 }, { "type": "loss", "content": 0.00013233958452474326, "timestamp": "2025-09-10 02:30:22.474890", "step": 6909, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:30:32.571318", "step": 6909, "epoch": 3 }, { "type": "pplx", "content": 24853930.15520345, "timestamp": "2025-09-10 02:30:32.574149", "step": 6909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:32.608153", "step": 6909, "epoch": 3 }, { "type": "loss", "content": 9.375996887683868e-05, "timestamp": "2025-09-10 02:30:32.616912", "step": 6910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:32.650941", "step": 6910, "epoch": 3 }, { "type": "loss", "content": 0.0002628415822982788, "timestamp": "2025-09-10 02:30:32.654724", "step": 6911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:32.687238", "step": 6911, "epoch": 3 }, { "type": "loss", "content": 0.0002938093966804445, "timestamp": "2025-09-10 02:30:32.711218", "step": 6912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:32.742241", "step": 6912, "epoch": 3 }, { "type": "loss", "content": 0.0008156453259289265, "timestamp": "2025-09-10 02:30:32.744693", "step": 6913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:32.775489", "step": 6913, "epoch": 3 }, { "type": "loss", "content": 0.00015855650417506695, "timestamp": "2025-09-10 02:30:32.779710", "step": 6914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:32.811619", "step": 6914, "epoch": 3 }, { "type": "loss", "content": 6.417724216589704e-05, "timestamp": "2025-09-10 02:30:32.815751", "step": 6915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:32.847821", "step": 6915, "epoch": 3 }, { "type": "loss", "content": 0.011772527359426022, "timestamp": "2025-09-10 02:30:32.875766", "step": 6916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:32.908515", "step": 6916, "epoch": 3 }, { "type": "loss", "content": 0.0008328754338435829, "timestamp": "2025-09-10 02:30:32.912764", "step": 6917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:32.944174", "step": 6917, "epoch": 3 }, { "type": "loss", "content": 0.00012226430408190936, "timestamp": "2025-09-10 02:30:32.948156", "step": 6918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:32.979169", "step": 6918, "epoch": 3 }, { "type": "loss", "content": 0.00023380214406643063, "timestamp": "2025-09-10 02:30:32.985922", "step": 6919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:33.017286", "step": 6919, "epoch": 3 }, { "type": "loss", "content": 0.0002332628209842369, "timestamp": "2025-09-10 02:30:33.049690", "step": 6920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:33.081821", "step": 6920, "epoch": 3 }, { "type": "loss", "content": 0.000248458469286561, "timestamp": "2025-09-10 02:30:33.086063", "step": 6921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:33.117484", "step": 6921, "epoch": 3 }, { "type": "loss", "content": 0.00023750065884087235, "timestamp": "2025-09-10 02:30:33.127207", "step": 6922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:33.158861", "step": 6922, "epoch": 3 }, { "type": "loss", "content": 0.00011210433149244636, "timestamp": "2025-09-10 02:30:33.165640", "step": 6923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:33.198408", "step": 6923, "epoch": 3 }, { "type": "loss", "content": 0.00017960583500098437, "timestamp": "2025-09-10 02:30:33.226801", "step": 6924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:33.259757", "step": 6924, "epoch": 3 }, { "type": "loss", "content": 0.00010823294724104926, "timestamp": "2025-09-10 02:30:33.264770", "step": 6925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:33.298052", "step": 6925, "epoch": 3 }, { "type": "loss", "content": 0.001127683324739337, "timestamp": "2025-09-10 02:30:33.304551", "step": 6926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:33.347085", "step": 6926, "epoch": 3 }, { "type": "loss", "content": 0.0002121505531249568, "timestamp": "2025-09-10 02:30:33.353818", "step": 6927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:33.387708", "step": 6927, "epoch": 3 }, { "type": "loss", "content": 0.00015519419685006142, "timestamp": "2025-09-10 02:30:33.415510", "step": 6928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:33.458245", "step": 6928, "epoch": 3 }, { "type": "loss", "content": 0.0001299724681302905, "timestamp": "2025-09-10 02:30:33.466105", "step": 6929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:33.500059", "step": 6929, "epoch": 3 }, { "type": "loss", "content": 0.0001691354700597003, "timestamp": "2025-09-10 02:30:33.507491", "step": 6930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:33.539099", "step": 6930, "epoch": 3 }, { "type": "loss", "content": 8.918951789382845e-05, "timestamp": "2025-09-10 02:30:33.546449", "step": 6931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:33.579426", "step": 6931, "epoch": 3 }, { "type": "loss", "content": 0.00011090342741226777, "timestamp": "2025-09-10 02:30:33.604561", "step": 6932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:33.636469", "step": 6932, "epoch": 3 }, { "type": "loss", "content": 0.00038635358214378357, "timestamp": "2025-09-10 02:30:33.640763", "step": 6933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:33.675256", "step": 6933, "epoch": 3 }, { "type": "loss", "content": 0.0012322509428486228, "timestamp": "2025-09-10 02:30:33.686697", "step": 6934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:33.719036", "step": 6934, "epoch": 3 }, { "type": "loss", "content": 0.0012386480811983347, "timestamp": "2025-09-10 02:30:33.726134", "step": 6935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:33.758568", "step": 6935, "epoch": 3 }, { "type": "loss", "content": 3.629237107816152e-05, "timestamp": "2025-09-10 02:30:33.786432", "step": 6936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:33.819712", "step": 6936, "epoch": 3 }, { "type": "loss", "content": 7.105556142050773e-05, "timestamp": "2025-09-10 02:30:33.823893", "step": 6937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:33.859795", "step": 6937, "epoch": 3 }, { "type": "loss", "content": 9.639743802836165e-05, "timestamp": "2025-09-10 02:30:33.869595", "step": 6938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:33.904854", "step": 6938, "epoch": 3 }, { "type": "loss", "content": 6.235136243049055e-05, "timestamp": "2025-09-10 02:30:33.914782", "step": 6939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:33.947402", "step": 6939, "epoch": 3 }, { "type": "loss", "content": 0.0002960147976409644, "timestamp": "2025-09-10 02:30:33.979908", "step": 6940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:34.012860", "step": 6940, "epoch": 3 }, { "type": "loss", "content": 0.007601078599691391, "timestamp": "2025-09-10 02:30:34.024405", "step": 6941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:34.057014", "step": 6941, "epoch": 3 }, { "type": "loss", "content": 3.58233337465208e-05, "timestamp": "2025-09-10 02:30:34.063787", "step": 6942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:34.100482", "step": 6942, "epoch": 3 }, { "type": "loss", "content": 0.0015243064844980836, "timestamp": "2025-09-10 02:30:34.103494", "step": 6943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:34.138373", "step": 6943, "epoch": 3 }, { "type": "loss", "content": 0.00024156909785233438, "timestamp": "2025-09-10 02:30:34.165987", "step": 6944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:30:34.216927", "step": 6944, "epoch": 3 }, { "type": "loss", "content": 0.00428308779373765, "timestamp": "2025-09-10 02:30:34.233895", "step": 6945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:34.272835", "step": 6945, "epoch": 3 }, { "type": "loss", "content": 0.029404859989881516, "timestamp": "2025-09-10 02:30:34.282825", "step": 6946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:34.329763", "step": 6946, "epoch": 3 }, { "type": "loss", "content": 0.00013546801346819848, "timestamp": "2025-09-10 02:30:34.337397", "step": 6947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:34.375921", "step": 6947, "epoch": 3 }, { "type": "loss", "content": 3.598109833546914e-05, "timestamp": "2025-09-10 02:30:34.404145", "step": 6948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:34.439246", "step": 6948, "epoch": 3 }, { "type": "loss", "content": 0.00020628688798751682, "timestamp": "2025-09-10 02:30:34.442744", "step": 6949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:30:34.487136", "step": 6949, "epoch": 3 }, { "type": "loss", "content": 0.00010723454033723101, "timestamp": "2025-09-10 02:30:34.503012", "step": 6950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:34.539197", "step": 6950, "epoch": 3 }, { "type": "loss", "content": 0.00011623270984273404, "timestamp": "2025-09-10 02:30:34.547880", "step": 6951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:34.586654", "step": 6951, "epoch": 3 }, { "type": "loss", "content": 0.0001242592406924814, "timestamp": "2025-09-10 02:30:34.611887", "step": 6952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:34.643970", "step": 6952, "epoch": 3 }, { "type": "loss", "content": 0.0004370961687527597, "timestamp": "2025-09-10 02:30:34.649214", "step": 6953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:34.690287", "step": 6953, "epoch": 3 }, { "type": "loss", "content": 0.0002730258565861732, "timestamp": "2025-09-10 02:30:34.700991", "step": 6954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:34.736851", "step": 6954, "epoch": 3 }, { "type": "loss", "content": 0.00011180860747117549, "timestamp": "2025-09-10 02:30:34.744122", "step": 6955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:34.783004", "step": 6955, "epoch": 3 }, { "type": "loss", "content": 0.0004824143834412098, "timestamp": "2025-09-10 02:30:34.817426", "step": 6956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:34.849646", "step": 6956, "epoch": 3 }, { "type": "loss", "content": 0.00022752817312721163, "timestamp": "2025-09-10 02:30:34.851825", "step": 6957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:34.883349", "step": 6957, "epoch": 3 }, { "type": "loss", "content": 0.0062463474459946156, "timestamp": "2025-09-10 02:30:34.890202", "step": 6958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:34.922061", "step": 6958, "epoch": 3 }, { "type": "loss", "content": 5.5043336033122614e-05, "timestamp": "2025-09-10 02:30:34.928820", "step": 6959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 496 ], "flops": 14712978242368 }, "timestamp": "2025-09-10 02:30:34.972076", "step": 6959, "epoch": 3 }, { "type": "loss", "content": 0.0005719130276702344, "timestamp": "2025-09-10 02:30:35.010467", "step": 6960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:35.043747", "step": 6960, "epoch": 3 }, { "type": "loss", "content": 0.00044621675624512136, "timestamp": "2025-09-10 02:30:35.050415", "step": 6961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:35.084469", "step": 6961, "epoch": 3 }, { "type": "loss", "content": 0.00034819470602087677, "timestamp": "2025-09-10 02:30:35.094215", "step": 6962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:35.126745", "step": 6962, "epoch": 3 }, { "type": "loss", "content": 0.00016455540026072413, "timestamp": "2025-09-10 02:30:35.133099", "step": 6963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:35.164791", "step": 6963, "epoch": 3 }, { "type": "loss", "content": 5.0337421271251515e-05, "timestamp": "2025-09-10 02:30:35.188773", "step": 6964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:35.221492", "step": 6964, "epoch": 3 }, { "type": "loss", "content": 0.00010189624299528077, "timestamp": "2025-09-10 02:30:35.226632", "step": 6965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 416 ], "flops": 12339959612288 }, "timestamp": "2025-09-10 02:30:35.264821", "step": 6965, "epoch": 3 }, { "type": "loss", "content": 3.955454667448066e-05, "timestamp": "2025-09-10 02:30:35.280698", "step": 6966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:35.314438", "step": 6966, "epoch": 3 }, { "type": "loss", "content": 0.0009393363143317401, "timestamp": "2025-09-10 02:30:35.324855", "step": 6967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:35.356816", "step": 6967, "epoch": 3 }, { "type": "loss", "content": 3.750239193323068e-05, "timestamp": "2025-09-10 02:30:35.384330", "step": 6968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:35.418787", "step": 6968, "epoch": 3 }, { "type": "loss", "content": 7.956552872201428e-05, "timestamp": "2025-09-10 02:30:35.423625", "step": 6969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:35.459261", "step": 6969, "epoch": 3 }, { "type": "loss", "content": 0.0006180386990308762, "timestamp": "2025-09-10 02:30:35.466933", "step": 6970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:35.502471", "step": 6970, "epoch": 3 }, { "type": "loss", "content": 0.00042301107896491885, "timestamp": "2025-09-10 02:30:35.516228", "step": 6971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:35.547668", "step": 6971, "epoch": 3 }, { "type": "loss", "content": 0.00011797425395343453, "timestamp": "2025-09-10 02:30:35.575234", "step": 6972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:35.607400", "step": 6972, "epoch": 3 }, { "type": "loss", "content": 1.7395021131960675e-05, "timestamp": "2025-09-10 02:30:35.612305", "step": 6973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:35.643800", "step": 6973, "epoch": 3 }, { "type": "loss", "content": 4.854817962041125e-05, "timestamp": "2025-09-10 02:30:35.650850", "step": 6974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:35.681964", "step": 6974, "epoch": 3 }, { "type": "loss", "content": 0.0004031723365187645, "timestamp": "2025-09-10 02:30:35.692077", "step": 6975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:35.725419", "step": 6975, "epoch": 3 }, { "type": "loss", "content": 0.026183495298027992, "timestamp": "2025-09-10 02:30:35.749986", "step": 6976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:35.787096", "step": 6976, "epoch": 3 }, { "type": "loss", "content": 1.9930823327740654e-05, "timestamp": "2025-09-10 02:30:35.802258", "step": 6977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:35.835958", "step": 6977, "epoch": 3 }, { "type": "loss", "content": 3.449880387051962e-05, "timestamp": "2025-09-10 02:30:35.842859", "step": 6978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:35.875898", "step": 6978, "epoch": 3 }, { "type": "loss", "content": 7.551814633188769e-05, "timestamp": "2025-09-10 02:30:35.886516", "step": 6979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:35.920589", "step": 6979, "epoch": 3 }, { "type": "loss", "content": 0.00020103261340409517, "timestamp": "2025-09-10 02:30:35.954756", "step": 6980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:35.987128", "step": 6980, "epoch": 3 }, { "type": "loss", "content": 0.0001782312901923433, "timestamp": "2025-09-10 02:30:35.996757", "step": 6981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:36.032245", "step": 6981, "epoch": 3 }, { "type": "loss", "content": 5.386146222008392e-05, "timestamp": "2025-09-10 02:30:36.039500", "step": 6982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:36.071475", "step": 6982, "epoch": 3 }, { "type": "loss", "content": 0.014369412325322628, "timestamp": "2025-09-10 02:30:36.083000", "step": 6983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:36.114751", "step": 6983, "epoch": 3 }, { "type": "loss", "content": 0.00018798027304001153, "timestamp": "2025-09-10 02:30:36.142496", "step": 6984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:36.174099", "step": 6984, "epoch": 3 }, { "type": "loss", "content": 0.00023625533503945917, "timestamp": "2025-09-10 02:30:36.186822", "step": 6985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:36.218572", "step": 6985, "epoch": 3 }, { "type": "loss", "content": 8.370379509869963e-05, "timestamp": "2025-09-10 02:30:36.231242", "step": 6986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:36.262615", "step": 6986, "epoch": 3 }, { "type": "loss", "content": 0.0004739656869787723, "timestamp": "2025-09-10 02:30:36.269571", "step": 6987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:36.300097", "step": 6987, "epoch": 3 }, { "type": "loss", "content": 1.8654181985766627e-05, "timestamp": "2025-09-10 02:30:36.323594", "step": 6988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:36.353988", "step": 6988, "epoch": 3 }, { "type": "loss", "content": 7.558034121757373e-05, "timestamp": "2025-09-10 02:30:36.356105", "step": 6989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:36.387777", "step": 6989, "epoch": 3 }, { "type": "loss", "content": 0.0002668748202268034, "timestamp": "2025-09-10 02:30:36.394720", "step": 6990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:36.425215", "step": 6990, "epoch": 3 }, { "type": "loss", "content": 0.00021226401440799236, "timestamp": "2025-09-10 02:30:36.432234", "step": 6991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:36.463874", "step": 6991, "epoch": 3 }, { "type": "loss", "content": 0.00016221609257627279, "timestamp": "2025-09-10 02:30:36.492197", "step": 6992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:36.522883", "step": 6992, "epoch": 3 }, { "type": "loss", "content": 8.354503370355815e-05, "timestamp": "2025-09-10 02:30:36.528161", "step": 6993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:36.559418", "step": 6993, "epoch": 3 }, { "type": "loss", "content": 2.1902184016653337e-05, "timestamp": "2025-09-10 02:30:36.569482", "step": 6994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:36.602148", "step": 6994, "epoch": 3 }, { "type": "loss", "content": 0.00011977553367614746, "timestamp": "2025-09-10 02:30:36.608742", "step": 6995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:30:36.645587", "step": 6995, "epoch": 3 }, { "type": "loss", "content": 0.0002517557586543262, "timestamp": "2025-09-10 02:30:36.680558", "step": 6996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:36.714025", "step": 6996, "epoch": 3 }, { "type": "loss", "content": 9.101787145482376e-05, "timestamp": "2025-09-10 02:30:36.727174", "step": 6997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:36.758526", "step": 6997, "epoch": 3 }, { "type": "loss", "content": 0.0001227896282216534, "timestamp": "2025-09-10 02:30:36.762573", "step": 6998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 368 ], "flops": 10916148434240 }, "timestamp": "2025-09-10 02:30:36.797864", "step": 6998, "epoch": 3 }, { "type": "loss", "content": 7.544071559095755e-05, "timestamp": "2025-09-10 02:30:36.811692", "step": 6999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:36.844402", "step": 6999, "epoch": 3 }, { "type": "loss", "content": 0.003692924277856946, "timestamp": "2025-09-10 02:30:36.872282", "step": 7000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 7000", "timestamp": "2025-09-10 02:30:42.318498", "step": 7000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:42.362451", "step": 7000, "epoch": 3 }, { "type": "loss", "content": 7.54517168388702e-05, "timestamp": "2025-09-10 02:30:42.366545", "step": 7001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:42.398762", "step": 7001, "epoch": 3 }, { "type": "loss", "content": 7.800796447554603e-05, "timestamp": "2025-09-10 02:30:42.402311", "step": 7002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:42.434025", "step": 7002, "epoch": 3 }, { "type": "loss", "content": 0.029476981610059738, "timestamp": "2025-09-10 02:30:42.440768", "step": 7003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:42.472458", "step": 7003, "epoch": 3 }, { "type": "loss", "content": 0.0001302637974731624, "timestamp": "2025-09-10 02:30:42.496887", "step": 7004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:42.530135", "step": 7004, "epoch": 3 }, { "type": "loss", "content": 0.011870044283568859, "timestamp": "2025-09-10 02:30:42.540086", "step": 7005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:42.573458", "step": 7005, "epoch": 3 }, { "type": "loss", "content": 0.00021210841077845544, "timestamp": "2025-09-10 02:30:42.580218", "step": 7006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:42.612231", "step": 7006, "epoch": 3 }, { "type": "loss", "content": 5.902814882574603e-05, "timestamp": "2025-09-10 02:30:42.619093", "step": 7007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:42.652006", "step": 7007, "epoch": 3 }, { "type": "loss", "content": 0.0002512595965526998, "timestamp": "2025-09-10 02:30:42.682861", "step": 7008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:42.718324", "step": 7008, "epoch": 3 }, { "type": "loss", "content": 0.0005681976908817887, "timestamp": "2025-09-10 02:30:42.726159", "step": 7009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:42.761462", "step": 7009, "epoch": 3 }, { "type": "loss", "content": 0.0010124502005055547, "timestamp": "2025-09-10 02:30:42.766074", "step": 7010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:42.797001", "step": 7010, "epoch": 3 }, { "type": "loss", "content": 0.0010337395360693336, "timestamp": "2025-09-10 02:30:42.799307", "step": 7011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:42.831691", "step": 7011, "epoch": 3 }, { "type": "loss", "content": 0.000359332247171551, "timestamp": "2025-09-10 02:30:42.860163", "step": 7012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:42.892069", "step": 7012, "epoch": 3 }, { "type": "loss", "content": 0.0001721430744510144, "timestamp": "2025-09-10 02:30:42.896594", "step": 7013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:42.928749", "step": 7013, "epoch": 3 }, { "type": "loss", "content": 0.0001188502719742246, "timestamp": "2025-09-10 02:30:42.939898", "step": 7014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:42.975516", "step": 7014, "epoch": 3 }, { "type": "loss", "content": 0.00013335456606000662, "timestamp": "2025-09-10 02:30:42.979666", "step": 7015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:43.012243", "step": 7015, "epoch": 3 }, { "type": "loss", "content": 7.617595110787079e-05, "timestamp": "2025-09-10 02:30:43.036918", "step": 7016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:43.071708", "step": 7016, "epoch": 3 }, { "type": "loss", "content": 0.0004477399925235659, "timestamp": "2025-09-10 02:30:43.080414", "step": 7017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:43.120632", "step": 7017, "epoch": 3 }, { "type": "loss", "content": 8.525385055691004e-05, "timestamp": "2025-09-10 02:30:43.134005", "step": 7018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:43.167311", "step": 7018, "epoch": 3 }, { "type": "loss", "content": 0.0006595923332497478, "timestamp": "2025-09-10 02:30:43.174667", "step": 7019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 400 ], "flops": 11865355886272 }, "timestamp": "2025-09-10 02:30:43.214268", "step": 7019, "epoch": 3 }, { "type": "loss", "content": 4.695385359809734e-05, "timestamp": "2025-09-10 02:30:43.250768", "step": 7020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:43.281538", "step": 7020, "epoch": 3 }, { "type": "loss", "content": 0.00017301096522714943, "timestamp": "2025-09-10 02:30:43.299985", "step": 7021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 480 ], "flops": 14238374516352 }, "timestamp": "2025-09-10 02:30:43.357493", "step": 7021, "epoch": 3 }, { "type": "loss", "content": 0.0001524223480373621, "timestamp": "2025-09-10 02:30:43.374840", "step": 7022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:43.405945", "step": 7022, "epoch": 3 }, { "type": "loss", "content": 0.00013412600674200803, "timestamp": "2025-09-10 02:30:43.413011", "step": 7023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:43.448479", "step": 7023, "epoch": 3 }, { "type": "loss", "content": 0.00017144733283203095, "timestamp": "2025-09-10 02:30:43.473773", "step": 7024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:43.510354", "step": 7024, "epoch": 3 }, { "type": "loss", "content": 0.00034450864768587053, "timestamp": "2025-09-10 02:30:43.512714", "step": 7025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:43.544358", "step": 7025, "epoch": 3 }, { "type": "loss", "content": 6.39140052953735e-05, "timestamp": "2025-09-10 02:30:43.551274", "step": 7026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:43.584415", "step": 7026, "epoch": 3 }, { "type": "loss", "content": 0.0004727788909804076, "timestamp": "2025-09-10 02:30:43.591039", "step": 7027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:43.622903", "step": 7027, "epoch": 3 }, { "type": "loss", "content": 0.0025320053100585938, "timestamp": "2025-09-10 02:30:43.654789", "step": 7028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:43.690290", "step": 7028, "epoch": 3 }, { "type": "loss", "content": 0.00012714836339000612, "timestamp": "2025-09-10 02:30:43.697951", "step": 7029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:43.733691", "step": 7029, "epoch": 3 }, { "type": "loss", "content": 8.670837996760383e-05, "timestamp": "2025-09-10 02:30:43.740638", "step": 7030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:43.773773", "step": 7030, "epoch": 3 }, { "type": "loss", "content": 0.011012664996087551, "timestamp": "2025-09-10 02:30:43.778300", "step": 7031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:43.811604", "step": 7031, "epoch": 3 }, { "type": "loss", "content": 0.00020046999270562083, "timestamp": "2025-09-10 02:30:43.836388", "step": 7032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 320 ], "flops": 9492337256192 }, "timestamp": "2025-09-10 02:30:43.869027", "step": 7032, "epoch": 3 }, { "type": "loss", "content": 0.00012668267299886793, "timestamp": "2025-09-10 02:30:43.878628", "step": 7033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 160 ], "flops": 4746299996032 }, "timestamp": "2025-09-10 02:30:43.910975", "step": 7033, "epoch": 3 }, { "type": "loss", "content": 5.517674799193628e-05, "timestamp": "2025-09-10 02:30:43.913744", "step": 7034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:43.947657", "step": 7034, "epoch": 3 }, { "type": "loss", "content": 0.0005423200782388449, "timestamp": "2025-09-10 02:30:43.954538", "step": 7035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:43.990008", "step": 7035, "epoch": 3 }, { "type": "loss", "content": 0.0030603199265897274, "timestamp": "2025-09-10 02:30:44.024506", "step": 7036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:44.062450", "step": 7036, "epoch": 3 }, { "type": "loss", "content": 5.9157235227758065e-05, "timestamp": "2025-09-10 02:30:44.075484", "step": 7037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:44.117288", "step": 7037, "epoch": 3 }, { "type": "loss", "content": 3.580377597245388e-05, "timestamp": "2025-09-10 02:30:44.124431", "step": 7038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:44.157272", "step": 7038, "epoch": 3 }, { "type": "loss", "content": 0.00011388809798518196, "timestamp": "2025-09-10 02:30:44.168210", "step": 7039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:44.200882", "step": 7039, "epoch": 3 }, { "type": "loss", "content": 7.329420623136684e-05, "timestamp": "2025-09-10 02:30:44.232066", "step": 7040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 384 ], "flops": 11390752160256 }, "timestamp": "2025-09-10 02:30:44.270763", "step": 7040, "epoch": 3 }, { "type": "loss", "content": 9.624590165913105e-05, "timestamp": "2025-09-10 02:30:44.284081", "step": 7041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:30:44.334268", "step": 7041, "epoch": 3 }, { "type": "loss", "content": 0.00018388082389719784, "timestamp": "2025-09-10 02:30:44.351364", "step": 7042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:44.383125", "step": 7042, "epoch": 3 }, { "type": "loss", "content": 2.913156640715897e-05, "timestamp": "2025-09-10 02:30:44.387484", "step": 7043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:44.419803", "step": 7043, "epoch": 3 }, { "type": "loss", "content": 0.0005409237928688526, "timestamp": "2025-09-10 02:30:44.445221", "step": 7044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:44.480208", "step": 7044, "epoch": 3 }, { "type": "loss", "content": 2.076716918963939e-05, "timestamp": "2025-09-10 02:30:44.492853", "step": 7045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 272 ], "flops": 8068526078144 }, "timestamp": "2025-09-10 02:30:44.528299", "step": 7045, "epoch": 3 }, { "type": "loss", "content": 0.00021151323744561523, "timestamp": "2025-09-10 02:30:44.538395", "step": 7046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:44.573351", "step": 7046, "epoch": 3 }, { "type": "loss", "content": 4.7153665946098045e-05, "timestamp": "2025-09-10 02:30:44.580768", "step": 7047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 304 ], "flops": 9017733530176 }, "timestamp": "2025-09-10 02:30:44.616096", "step": 7047, "epoch": 3 }, { "type": "loss", "content": 0.0007388163357973099, "timestamp": "2025-09-10 02:30:44.649270", "step": 7048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:44.684828", "step": 7048, "epoch": 3 }, { "type": "loss", "content": 9.87757885013707e-05, "timestamp": "2025-09-10 02:30:44.688511", "step": 7049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:44.720590", "step": 7049, "epoch": 3 }, { "type": "loss", "content": 5.2101851906627417e-05, "timestamp": "2025-09-10 02:30:44.728049", "step": 7050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 208 ], "flops": 6170111174080 }, "timestamp": "2025-09-10 02:30:44.760527", "step": 7050, "epoch": 3 }, { "type": "loss", "content": 4.3754731450462714e-05, "timestamp": "2025-09-10 02:30:44.767356", "step": 7051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 464 ], "flops": 13763770790336 }, "timestamp": "2025-09-10 02:30:44.810758", "step": 7051, "epoch": 3 }, { "type": "loss", "content": 0.0001889723789645359, "timestamp": "2025-09-10 02:30:44.848646", "step": 7052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 224 ], "flops": 6644714900096 }, "timestamp": "2025-09-10 02:30:44.884716", "step": 7052, "epoch": 3 }, { "type": "loss", "content": 0.00012450621579773724, "timestamp": "2025-09-10 02:30:44.889308", "step": 7053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:44.920683", "step": 7053, "epoch": 3 }, { "type": "loss", "content": 0.00014559333794750273, "timestamp": "2025-09-10 02:30:44.928109", "step": 7054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 336 ], "flops": 9966940982208 }, "timestamp": "2025-09-10 02:30:44.964282", "step": 7054, "epoch": 3 }, { "type": "loss", "content": 0.00017697580915410072, "timestamp": "2025-09-10 02:30:44.977610", "step": 7055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 448 ], "flops": 13289167064320 }, "timestamp": "2025-09-10 02:30:45.023083", "step": 7055, "epoch": 3 }, { "type": "loss", "content": 0.00017205542826559395, "timestamp": "2025-09-10 02:30:45.060332", "step": 7056, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:30:55.373116", "step": 7056, "epoch": 3 }, { "type": "pplx", "content": 26144192.90405417, "timestamp": "2025-09-10 02:30:55.376345", "step": 7056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:55.406505", "step": 7056, "epoch": 3 }, { "type": "loss", "content": 0.0005065679433755577, "timestamp": "2025-09-10 02:30:55.410743", "step": 7057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:55.441447", "step": 7057, "epoch": 3 }, { "type": "loss", "content": 0.0013099844800308347, "timestamp": "2025-09-10 02:30:55.449030", "step": 7058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 256 ], "flops": 7593922352128 }, "timestamp": "2025-09-10 02:30:55.481859", "step": 7058, "epoch": 3 }, { "type": "loss", "content": 3.810837370110676e-05, "timestamp": "2025-09-10 02:30:55.489460", "step": 7059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:55.521106", "step": 7059, "epoch": 3 }, { "type": "loss", "content": 0.0017356050666421652, "timestamp": "2025-09-10 02:30:55.549027", "step": 7060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:55.580238", "step": 7060, "epoch": 3 }, { "type": "loss", "content": 0.00034908336238004267, "timestamp": "2025-09-10 02:30:55.585395", "step": 7061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:55.616476", "step": 7061, "epoch": 3 }, { "type": "loss", "content": 5.2377414249349385e-05, "timestamp": "2025-09-10 02:30:55.627386", "step": 7062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 176 ], "flops": 5220903722048 }, "timestamp": "2025-09-10 02:30:55.657587", "step": 7062, "epoch": 3 }, { "type": "loss", "content": 0.0034754632506519556, "timestamp": "2025-09-10 02:30:55.661652", "step": 7063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 352 ], "flops": 10441544708224 }, "timestamp": "2025-09-10 02:30:55.696164", "step": 7063, "epoch": 3 }, { "type": "loss", "content": 0.0006347990711219609, "timestamp": "2025-09-10 02:30:55.730783", "step": 7064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:55.763253", "step": 7064, "epoch": 3 }, { "type": "loss", "content": 4.8621186579111964e-05, "timestamp": "2025-09-10 02:30:55.768247", "step": 7065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 656 ], "flops": 19459015502528 }, "timestamp": "2025-09-10 02:30:55.823128", "step": 7065, "epoch": 3 }, { "type": "loss", "content": 3.0243045330280438e-05, "timestamp": "2025-09-10 02:30:55.846549", "step": 7066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 288 ], "flops": 8543129804160 }, "timestamp": "2025-09-10 02:30:55.877340", "step": 7066, "epoch": 3 }, { "type": "loss", "content": 0.00047078271745704114, "timestamp": "2025-09-10 02:30:55.888271", "step": 7067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 240 ], "flops": 7119318626112 }, "timestamp": "2025-09-10 02:30:55.919101", "step": 7067, "epoch": 3 }, { "type": "loss", "content": 4.756170528708026e-05, "timestamp": "2025-09-10 02:30:55.947586", "step": 7068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 192 ], "flops": 5695507448064 }, "timestamp": "2025-09-10 02:30:55.978287", "step": 7068, "epoch": 3 }, { "type": "loss", "content": 0.0010376720456406474, "timestamp": "2025-09-10 02:30:55.980530", "step": 7069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 528 ], "flops": 15662185694400 }, "timestamp": "2025-09-10 02:30:56.027381", "step": 7069, "epoch": 3 }, { "type": "loss", "content": 2.100633537338581e-05, "timestamp": "2025-09-10 02:30:56.046434", "step": 7070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 3, 224 ], "flops": 4983601869792 }, "timestamp": "2025-09-10 02:30:56.078505", "step": 7070, "epoch": 3 }, { "type": "loss", "content": 4.26122423959896e-05, "timestamp": "2025-09-10 02:30:56.081611", "step": 7071, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 736 ], "batch_size": 8, "flops": 14554433988352 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 480 ], "batch_size": 8, "flops": 9492022189824 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 624 ], "batch_size": 8, "flops": 12339628826496 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 432 ], "batch_size": 8, "flops": 8542819977600 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 544 ], "batch_size": 8, "flops": 10757625139456 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 656 ], "batch_size": 8, "flops": 12972430301312 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 128 ], "batch_size": 8, "flops": 2531205966848 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 496 ], "batch_size": 8, "flops": 9808422927232 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 416 ], "batch_size": 8, "flops": 8226419240192 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 400 ], "batch_size": 8, "flops": 7910018502784 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 352 ], "batch_size": 8, "flops": 6960816290560 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 288 ], "batch_size": 8, "flops": 5695213340928 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 144 ], "batch_size": 8, "flops": 2847606704256 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 464 ], "batch_size": 8, "flops": 9175621452416 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 448 ], "batch_size": 8, "flops": 8859220715008 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 560 ], "batch_size": 8, "flops": 11074025876864 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 320 ], "batch_size": 8, "flops": 6328014815744 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 304 ], "batch_size": 8, "flops": 6011614078336 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 576 ], "batch_size": 8, "flops": 11390426614272 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 368 ], "batch_size": 8, "flops": 7277217027968 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 1168 ], "batch_size": 8, "flops": 23097253898368 }, { "type": "perplexity", "in_batch_dim": [ 8, 256 ], "batch_size": 8, "flops": 5062411866112 }, { "type": "perplexity", "in_batch_dim": [ 8, 160 ], "batch_size": 8, "flops": 3164007441664 }, { "type": "perplexity", "in_batch_dim": [ 8, 240 ], "batch_size": 8, "flops": 4746011128704 }, { "type": "perplexity", "in_batch_dim": [ 8, 640 ], "batch_size": 8, "flops": 12656029563904 }, { "type": "perplexity", "in_batch_dim": [ 8, 272 ], "batch_size": 8, "flops": 5378812603520 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 224 ], "batch_size": 8, "flops": 4429610391296 }, { "type": "perplexity", "in_batch_dim": [ 8, 384 ], "batch_size": 8, "flops": 7593617765376 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 8, 208 ], "batch_size": 8, "flops": 4113209653888 }, { "type": "perplexity", "in_batch_dim": [ 8, 192 ], "batch_size": 8, "flops": 3796808916480 }, { "type": "perplexity", "in_batch_dim": [ 8, 336 ], "batch_size": 8, "flops": 6644415553152 }, { "type": "perplexity", "in_batch_dim": [ 8, 176 ], "batch_size": 8, "flops": 3480408179072 }, { "type": "perplexity", "in_batch_dim": [ 6, 208 ], "batch_size": 8, "flops": 4113209653888 } ], "timestamp": "2025-09-10 02:31:06.337196", "step": 7071, "epoch": 3 }, { "type": "pplx", "content": 26398362.306229845, "timestamp": "2025-09-10 02:31:06.344312", "step": 7071, "epoch": 3 }, { "type": "best_pplx", "content": 12191892.104022551, "timestamp": "2025-09-10 02:31:06.346146", "step": 7071, "epoch": 3 }, { "type": "best_step", "content": 147, "timestamp": "2025-09-10 02:31:06.347858", "step": 7071, "epoch": 3 }, { "type": "total_pplx_flops", "content": 105693667713235200, "timestamp": "2025-09-10 02:31:06.349825", "step": 7071, "epoch": 3 }, { "type": "total_train_flops", "content": 53674555878669600, "timestamp": "2025-09-10 02:31:06.352004", "step": 7071, "epoch": 3 } ], "best_evals": { "pplx": { "score": 12191892.104022551, "step": 147 }, "rougel": { "precision": 0.8507645259938837, "recall": 0.8507645259938837, "fmeasure": 0.8507645259938837 } } }